Hello guys I'm trying to convert .xls files to .csv in apache NIFI data flow. I tried many solutions with no result I even tried creating a script like this sccript
Thank you in advance
Configure your ExecuteStreamCommand
as
and try using below code in your python script,
import csv
import os
import sys
from io import StringIO, BytesIO
import pandas as pd
import xlrd
from pandas import ExcelFile
wb = xlrd.open_workbook(file_contents=sys.stdin.read(),logfile=open(os.devnull, 'w'))
excel_file_df = pd.read_excel(wb, sheet_name='Sheet1', index=False, index_col=0, encoding='utf-8',engine='xlrd')
#flowfile_content = ExcelFile(BytesIO(sys.stdin.read()))
#excel_file_df = pd.read_excel(flowfile_content, sheet_name='Sheet1', index=False, index_col=0, encoding='utf-8')
csv_data_rows = []
header_list = list(excel_file_df.columns.values)
temp_header_list = []
for field in header_list:
temp = '"' + field + '"'
temp_header_list.append(temp)
header_row = ','.join([str(elem) for elem in temp_header_list])
csv_data_rows.append(header_row)
is_header_row = True
for index, row in excel_file_df.iterrows():
if is_header_row :
is_header_row = False
continue
temp_data_list = []
for item in row :
#item = item.encode('utf-8', 'ignore').decode('utf-8')
if hasattr(item, 'encode'):
item = item.encode('ascii', 'ignore').decode('ascii')
item = str(item)
item = item.replace('\n', '')
item = item.replace('",', '" ')
if item == 'nan':
item=''
temp = '"' + str(item) + '"'
temp_data_list.append(temp)
data_row = ','.join([str(elem) for elem in temp_data_list])
data_row = data_row
csv_data_rows.append(data_row)
for item in csv_data_rows:
sys.stdout.write("%s\r\n" % item)