Search code examples
pythonexcelpython-2.7csvxlsm

Write a list of lists into csv and having UnicodeEncodeError in python


I am converting an xlsm file to csv using the openpyxl library. I have the problem that when I try to pass the list of lists to the csv python returns the following error:

Traceback (most recent call last): File "test.py", line 21, in listIntoCsv(finalList) File "test.py", line 16, in listIntoCsv write.writerows(list) UnicodeEncodeError: 'ascii' codec can't encode character u'\xfa' in position 20: ordinal not in range(128)

These are 2 example of 1 list inside the final list:

[
[u'[email protected]', u'[email protected], [email protected]', 
datetime.datetime(2022, 7, 18, 10, 7, 16),
1L, '0', 1L, 2L, 'NO', 
None, '[email protected]',
'[email protected]', None,
None, False, False, None,None,
False, False, False, None, None,
True, 'SI', 'N/A',3182L, 0L, None,
None, None, '#N/A', 'RE: N< >LWWM',
u'a9e5bbbb497', u'Com: \xd1<GR A', None]...
]

I am executing the code with python2. I have tried to apply different solutions so that the code looks like this:

from openpyxl import load_workbook
import csv
import codec

  excelFile = load_workbook('test.xlsm', data_only=True)
  currentSheet = excelFile.worksheets[0]
  
  def iter_rows(currentSheet):
    for row in currentSheet.iter_rows():
      yield [cell.value for cell in row]

  def listIntoCsv(list):
    with codecs.open('test','w',encoding='UTF-8') as f:
      write = csv.writer(f)
      write.writerows(list)
  
  finalList = list(iter_rows(currentSheet))
  print(finalList)
  listIntoCsv(finalList)

Solution

  • In order to change the type of each element inside the list we have to change them manually by changing their type to string due to python 2.7 does not support unicode format. So the solution is:

    from types import NoneType
    import unicodedata
    from openpyxl import load_workbook
    import csv
    from datetime import datetime
    import os
      
      def dateTimeToUnixTIme(dateTime):
        dataTimeObject = datetime.datetime.strptime(dateTime, "%Y-%m-%d 
        %H:%M:%S.%f")
        unixTime = time.mktime(dataTimeObject.timetuple()) + 
        (dataTimeObject.microsecond / 1000000.0)
        return unixTime
    
      def iterRows(currentSheet):
        for row in currentSheet.iter_rows():
          yield [cell.value for cell in row]
    
      def listIntoCsv(list):
        with open('excel.csv','w') as f:
          write = csv.writer(f)
          write.writerows(list)
      
    
      def decodeList(list):
        for x,y in enumerate(list):
          for j,element in enumerate(y):
            if isinstance(element,unicode):
              element = unicodedata.normalize('NFKD', 
              element).encode('ascii', 'ignore')
              if (", ") in element and '@' in element:
                element = element.replace(' ','')
                element = element.split(",")
                list[x][j] = element
              list[x][j] = element
    
            elif isinstance(element,date):
              element = element.strftime('%Y-%m-%d %H:%M:%S.%f')
              element = dateTimeToUnixTIme(element)
              list[x][j] = str(element)
    
            elif isinstance(element,(long,int,float)):
              element = str(element)
              list[x][j] = element
    
            elif isinstance(element,NoneType):
              element = str(element).replace('None','Nada')
              list[x][j] = element
    
            elif isinstance(element,bool):
              element = str(element)
              list[x][j] = element
    
            elif isinstance(element,str):
              element = str(element)
              list[x][j] = element
            else:
              list[x][j] = element
    
      return list
    
      ubicationExcel = 'bin/mailList/Investigacion_DLP_enmascarado.xlsm'
      excelFile = load_workbook(ubicationExcel, data_only=True)
      currentSheet = excelFile.worksheets[0]
      dirtyList = list(iterRows(currentSheet))
      finalList = encodeList(dirtyList)
      listIntoCsv(finalList)