Search code examples
pythonopenpyxlxlrd

Convert .xls to .xlsx so that Openpyxl can work with it


I wrote a flask application that will be used to process some excel files, however, I wrote it for .xlsx files. The inputted files might be .xls files, which I know Openpyxl can't open. How can I convert the file to .xlsx before processing it with Openpyxl in my application?

I saw something online about using xlrd to write the original .xls to a .xlsx file that Openpyxl can process but I was having trouble tailoring it to fit my specific application.

Thanks in advance!

from openpyxl import load_workbook
from openpyxl.styles import Style, Font
from flask import Flask, request, render_template, redirect, url_for, send_file
import os


app = Flask(__name__)

@app.route('/')
def index():
    return """<center><body bgcolor="#FACC2E">
          <h1><p>Automated TDX Report</h1></p><br>
    <form action="/upload" method=post enctype=multipart/form-data>
      <p><input type=file name=file>
         <input type=submit value=Upload>
    </form></center></body>"""

@app.route('/upload', methods = ['GET', 'POST'])
def upload():
    if request.method == 'POST':
        f = request.files['file']
        f.save(f.filename)
        return process(f.filename)

def process(filename):


    VP = ['ZYJD', 'ZYJC', 'ZYKC', 'ZYKA', 'ZYKB', 'ZYKD', 'ZYKE', 'ZYKF', 'ZYJB', 'ZYJX', 'ZYKG', 'ZYKH', 'ZYJE', 'ZYJA', 
         'ZYKI', 'ZYKX', 'ZYKK', 'ZYKJ', 'ZYJF', 'ZYJK', 'ZYJG', 'ZYJJ', 'ZYKL', 'ZYKM', 'ZYKN']

    VA = ['ZYIC', 'ZYIB', 'ZYHC', 'ZYIA', 'ZYHA', 'ZYHG', 'ZYHB', 'ZYID', 'ZYDA', 'ZYIE', 'ZYHD', 'ZYIG', 'ZYIX', 'ZYHE', 
         'ZYIF', 'ZYHX', 'ZYDE', 'ZYHF', 'ZYLB', 'ZYAC', 'ZYCF', 'ZYDF', 'ZYBG', 'ZYDG', 'ZYDD', 'ZYDH', 'ZYCB', 'ZYCA', 
         'ZYWA', 'ZYWB', 'ZYWC', 'ZYWD', 'ZYWE', 'ZYWF', 'ZYWG', 'ZYWI', 'ZYWJ']

    Gordon = ['ZYDB', 'ZYDX', 'ZYEB', 'ZYED', 'ZYEC', 'ZYEA', 'ZYEX', 'ZYFE', 'ZYFX', 'ZYFD', 'ZYFA', 'ZYFC', 'ZYFB', 
             'ZYGC', 'ZYGA', 'ZYGX', 'ZYGB', 'ZYGF', 'ZYGG', 'ZYGD', 'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD', 'ZYKM', 'ZYKN', 
             'ZYCC', 'ZYCE']

    Pete = ['ZYAD', 'ZYBX', 'ZYAX', 'ZYAB', 'ZYBC', 'ZYBA', 'ZYBB', 'ZYAA', 'ZYBD', 'ZYLE', 'ZYCX', 'ZYAE', 'ZYCC', 'ZYCE', 
           'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD']

    Mike = ['ZYKP', 'ZYAP', 'ZYHP', 'ZYJP', 'ZYFP', 'ZYJR', 'ZYCP', 'ZYIR', 'ZYAR', 'ZYBP', 'ZYKR', 'ZYJS', 'ZYIP', 'ZYHR',
           'ZYEP', 'ZYFF', 'ZYGP', 'ZYKS', 'ZYEE', 'ZYJH', 'ZYII', 'ZYHH', 'ZYJW']

    workbook = load_workbook(filename)
    worksheet = workbook.active

    worksheet.column_dimensions.group('B', hidden=True)
    worksheet.column_dimensions.group('D', hidden=True)
    worksheet.column_dimensions.group('E', hidden=True)
    worksheet.column_dimensions.group('F', hidden=True)
    worksheet.column_dimensions.group('G', hidden=True)
    worksheet.column_dimensions.group('H', hidden=True)
    worksheet.column_dimensions.group('I', hidden=True)
    worksheet.column_dimensions.group('K', hidden=True)
    worksheet.column_dimensions.group('L', hidden=True)
    worksheet.column_dimensions.group('M', hidden=True)
    worksheet.column_dimensions.group('N', hidden=True)
    worksheet.column_dimensions.group('O', hidden=True)
    worksheet.column_dimensions.group('P', hidden=True)
    worksheet.column_dimensions.group('Q', hidden=True)
    worksheet.column_dimensions.group('R', hidden=True)
    worksheet.column_dimensions.group('S', hidden=True)
    worksheet.column_dimensions.group('T', hidden=True)
    worksheet.column_dimensions.group('U', hidden=True)
    worksheet.column_dimensions.group('V', hidden=True)
    worksheet.column_dimensions.group('W', hidden=True)
    worksheet.column_dimensions.group('X', hidden=True)
    worksheet.column_dimensions.group('Y', hidden=True)
    worksheet.column_dimensions.group('Z', hidden=True)
    worksheet.column_dimensions.group('AA', hidden=True)
    worksheet.column_dimensions.group('AB', hidden=True)
    worksheet.column_dimensions.group('AC', hidden=True)
    worksheet.column_dimensions.group('AD', hidden=True)
    worksheet.column_dimensions.group('AE', hidden=True)
    worksheet.column_dimensions.group('AF', hidden=True)
    worksheet.column_dimensions.group('AG', hidden=True)
    worksheet.column_dimensions.group('AH', hidden=True)
    worksheet.column_dimensions.group('AI', hidden=True)
    worksheet.column_dimensions.group('AJ', hidden=True)
    worksheet.column_dimensions.group('AK', hidden=True)
    worksheet.column_dimensions.group('AM', hidden=True)
    worksheet.column_dimensions.group('AN', hidden=True)
    worksheet.column_dimensions.group('AP', hidden=True)
    worksheet.column_dimensions.group('AQ', hidden=True)
    worksheet.column_dimensions.group('AR', hidden=True)
    worksheet.column_dimensions.group('AS', hidden=True)
    worksheet.column_dimensions.group('AT', hidden=True)
    worksheet.column_dimensions.group('AU', hidden=True)
    worksheet.column_dimensions.group('AV', hidden=True)
    worksheet.column_dimensions.group('AW', hidden=True)
    worksheet.column_dimensions.group('AX', hidden=True)
    worksheet.column_dimensions.group('AY', hidden=True)
    worksheet.column_dimensions.group('AZ', hidden=True)
    worksheet.column_dimensions.group('BA', hidden=True)
    worksheet.column_dimensions.group('BB', hidden=True)
    worksheet.column_dimensions.group('BC', hidden=True)
    worksheet.column_dimensions.group('BD', hidden=True)
    worksheet.column_dimensions.group('BE', hidden=True)
    worksheet.column_dimensions.group('BF', hidden=True)
    worksheet.column_dimensions.group('BH', hidden=True)
    worksheet.column_dimensions.group('BI', hidden=True)
    worksheet.column_dimensions.group('BJ', hidden=True)
    worksheet.column_dimensions.group('BK', hidden=True)
    worksheet.column_dimensions.group('BL', hidden=True)
    worksheet.column_dimensions.group('BM', hidden=True)
    worksheet.column_dimensions.group('BN', hidden=True)
    worksheet.column_dimensions.group('BO', hidden=True)
    worksheet.column_dimensions.group('BP', hidden=True)
    worksheet.column_dimensions.group('BQ', hidden=True)
    worksheet.column_dimensions.group('BR', hidden=True)
    worksheet.column_dimensions.group('BS', hidden=True)
    worksheet.column_dimensions.group('BT', hidden=True)
    worksheet.column_dimensions.group('BU', hidden=True)
    worksheet.column_dimensions.group('BV', hidden=True)
    worksheet.column_dimensions.group('BW', hidden=True)
    worksheet.column_dimensions.group('BX', hidden=True)
    worksheet.column_dimensions.group('BY', hidden=True)
    worksheet.column_dimensions.group('BZ', hidden=True)
    worksheet.column_dimensions.group('CA', hidden=True)
    worksheet.column_dimensions.group('CB', hidden=True)
    worksheet.column_dimensions.group('CC', hidden=True)
    worksheet.column_dimensions.group('CD', hidden=True)
    worksheet.column_dimensions.group('CE', hidden=True)
    worksheet.column_dimensions.group('CF', hidden=True)
    worksheet.column_dimensions.group('CG', hidden=True)
    worksheet.column_dimensions.group('CH', hidden=True)
    worksheet.column_dimensions.group('CI', hidden=True)
    worksheet.column_dimensions.group('CJ', hidden=True)
    worksheet.column_dimensions.group('CK', hidden=True)
    worksheet.column_dimensions.group('CL', hidden=True)
    worksheet.column_dimensions.group('CM', hidden=True)
    worksheet.column_dimensions.group('CN', hidden=True)
    worksheet.column_dimensions.group('CO', hidden=True)
    worksheet.column_dimensions.group('CP', hidden=True)
    worksheet.column_dimensions.group('CQ', hidden=True)
    worksheet.column_dimensions.group('CR', hidden=True)
    worksheet.column_dimensions.group('CS', hidden=True)
    worksheet.column_dimensions.group('CU', hidden=True)


    routecolumn = worksheet.columns[58]
    i = 2
    supervisorheader = worksheet.cell("CV1")
    s = Style(font=Font(bold=True))
    supervisorheader.style = s

    worksheet['CV1'] = 'Supervisor'
    for route in routecolumn:
        if route.value == 'Responsible Route':
            continue
        if route.value in Gordon:
            pos = Gordon.index(route.value)
            worksheet['CV' + str(i)].value = 'Gordon'
            i += 1
        elif route.value in VA:
            pos = VA.index(route.value)
            worksheet['CV' + str(i)].value = 'Vinny A'
            i += 1
        elif route.value in VP:
            pos = VP.index(route.value)
            worksheet['CV' + str(i)].value = 'Vinny P'
            i += 1
        elif route.value in Pete:
            pos = Pete.index(route.value)
            worksheet['CV' + str(i)].value = 'Pete'
            i += 1
        elif route.value in Mike:
            pos = Mike.index(route.value)
            worksheet['CV' + str(i)].value = 'Mike'
            i += 1
        elif route.value not in Gordon or route.value not in VA or route.value not in VP or route.value not in Pete \
        or route.value not in Mike:
            worksheet['CV' + str(i)].value = 'Building'
            i += 1


    workbook.save(filename)
    newfilename = filename.strip(".xlsx")
    newfilename = newfilename + ".xls"
    os.rename('/home/MY NAME/'+filename, '/home/MY NAME/'+newfilename)

    return send_file(newfilename, attachment_filename='tdx.xls', as_attachment=True), os.remove ('/home/MY NAME/'+newfilename)



if __name__ == '__main__':
    app.run(debug = True, host = '0.0.0.0')

Solution

  • My recommendation: Use xlrd to read the values you need, and openpyxl to create a new xlsx workbook. You just need to be able to get all of the relevant info using xlrd.

    The only thing you're doing which isn't just reading values from cells is selecting the "active" worksheet. I found this Google Groups post in a thread titled "xlrd: Get active sheet" which says to try using either the .sheet_selected attribute or the .sheet_visible attribute, with the following code for checking which works:

    import xlrd
    b = xlrd.open_workbook("vis.xls") # where vis.xls is your test file
    for i, s in enumerate(b.sheets()):
        print(i, s.name, s.sheet_selected, s.sheet_visible)
    

    It also notes:

    These two attributes are documented in sheet.py in the dict _WINDOW_2 which starts at about line 2. sheet_visible is probably what you want. Ignore the MS docs which show this bit as undefined.