I am converting a JSON payload to XML file using Python ExecuteScript processor in NiFi. The JSON looks like this :
{
"Header": {
"Att1": 1,
"Att2": "value2",
"Att3": "1",
"Att4": "경기00자123"
}
}
The python script to convert this JSON to XML is as below :
import json
import xml.etree.ElementTree as ET
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class ModJSON(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
data = json.loads(text)
root = ET.Element("headerinfo")
entity = ET.SubElement(root, "headerfile")
ET.SubElement(entity, "Att1").text = str(data["Header"]["Att1"])
ET.SubElement(entity, "Att2").text = str(data["Header"]["Att2"])
ET.SubElement(entity, "Att3").text = str(data["Header"]["Att3"])
ET.SubElement(entity, "Att4").text = data["Header"]["Att4"].encode("utf8")
xmlNew = ET.tostring(root)
outputStream.write(bytearray(xmlNew))
flowFile = session.get()
if flowFile != None:
try :
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", 'headerfile.xml')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
except Exception as e:
flowFile = session.putAttribute(flowFile,'python_error', str(e))
session.transfer(flowFile, REL_FAILURE)
No matter how I try to encode the Att4 with Japanese characters, it looks like this in the resulting XML :
京都111を3
How can I change the code to fix this? Tried a lot of different things but nothing seems to work.
seems there is an issue with byte-string in jython - they are automatically converted to str object with incorrect encoding.
however ElementTree has write function that could write to a file-like object and OutputStream (java object) actually implements write function - so, we could make ElementTree write directly to OutputStream
import json
import xml.etree.ElementTree as ET
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class ModJSON(StreamCallback):
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
data = json.loads(text)
root = ET.Element("headerinfo")
entity = ET.SubElement(root, "headerfile")
ET.SubElement(entity, "Att1").text = str(data["Header"]["Att1"])
ET.SubElement(entity, "Att2").text = str(data["Header"]["Att2"])
ET.SubElement(entity, "Att3").text = str(data["Header"]["Att3"])
ET.SubElement(entity, "Att4").text = data["Header"]["Att4"]
ET.ElementTree(root).write(outputStream, encoding='utf-8')
flowFile = session.get()
if flowFile != None:
try :
flowFile = session.write(flowFile, ModJSON())
flowFile = session.putAttribute(flowFile, "filename", 'headerfile.xml')
session.transfer(flowFile, REL_SUCCESS)
session.commit()
except Exception as e:
flowFile = session.putAttribute(flowFile,'python_error', str(e))
session.transfer(flowFile, REL_FAILURE)