I wrote a flask application that will be used to process some excel files, however, I wrote it for .xlsx files. The inputted files might be .xls files, which I know Openpyxl can't open. How can I convert the file to .xlsx before processing it with Openpyxl in my application?
I saw something online about using xlrd to write the original .xls to a .xlsx file that Openpyxl can process but I was having trouble tailoring it to fit my specific application.
Thanks in advance!
from openpyxl import load_workbook
from openpyxl.styles import Style, Font
from flask import Flask, request, render_template, redirect, url_for, send_file
import os
app = Flask(__name__)
@app.route('/')
def index():
return """<center><body bgcolor="#FACC2E">
<h1><p>Automated TDX Report</h1></p><br>
<form action="/upload" method=post enctype=multipart/form-data>
<p><input type=file name=file>
<input type=submit value=Upload>
</form></center></body>"""
@app.route('/upload', methods = ['GET', 'POST'])
def upload():
if request.method == 'POST':
f = request.files['file']
f.save(f.filename)
return process(f.filename)
def process(filename):
VP = ['ZYJD', 'ZYJC', 'ZYKC', 'ZYKA', 'ZYKB', 'ZYKD', 'ZYKE', 'ZYKF', 'ZYJB', 'ZYJX', 'ZYKG', 'ZYKH', 'ZYJE', 'ZYJA',
'ZYKI', 'ZYKX', 'ZYKK', 'ZYKJ', 'ZYJF', 'ZYJK', 'ZYJG', 'ZYJJ', 'ZYKL', 'ZYKM', 'ZYKN']
VA = ['ZYIC', 'ZYIB', 'ZYHC', 'ZYIA', 'ZYHA', 'ZYHG', 'ZYHB', 'ZYID', 'ZYDA', 'ZYIE', 'ZYHD', 'ZYIG', 'ZYIX', 'ZYHE',
'ZYIF', 'ZYHX', 'ZYDE', 'ZYHF', 'ZYLB', 'ZYAC', 'ZYCF', 'ZYDF', 'ZYBG', 'ZYDG', 'ZYDD', 'ZYDH', 'ZYCB', 'ZYCA',
'ZYWA', 'ZYWB', 'ZYWC', 'ZYWD', 'ZYWE', 'ZYWF', 'ZYWG', 'ZYWI', 'ZYWJ']
Gordon = ['ZYDB', 'ZYDX', 'ZYEB', 'ZYED', 'ZYEC', 'ZYEA', 'ZYEX', 'ZYFE', 'ZYFX', 'ZYFD', 'ZYFA', 'ZYFC', 'ZYFB',
'ZYGC', 'ZYGA', 'ZYGX', 'ZYGB', 'ZYGF', 'ZYGG', 'ZYGD', 'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD', 'ZYKM', 'ZYKN',
'ZYCC', 'ZYCE']
Pete = ['ZYAD', 'ZYBX', 'ZYAX', 'ZYAB', 'ZYBC', 'ZYBA', 'ZYBB', 'ZYAA', 'ZYBD', 'ZYLE', 'ZYCX', 'ZYAE', 'ZYCC', 'ZYCE',
'ZYLA', 'ZYBF', 'ZYBE', 'ZYLD']
Mike = ['ZYKP', 'ZYAP', 'ZYHP', 'ZYJP', 'ZYFP', 'ZYJR', 'ZYCP', 'ZYIR', 'ZYAR', 'ZYBP', 'ZYKR', 'ZYJS', 'ZYIP', 'ZYHR',
'ZYEP', 'ZYFF', 'ZYGP', 'ZYKS', 'ZYEE', 'ZYJH', 'ZYII', 'ZYHH', 'ZYJW']
workbook = load_workbook(filename)
worksheet = workbook.active
worksheet.column_dimensions.group('B', hidden=True)
worksheet.column_dimensions.group('D', hidden=True)
worksheet.column_dimensions.group('E', hidden=True)
worksheet.column_dimensions.group('F', hidden=True)
worksheet.column_dimensions.group('G', hidden=True)
worksheet.column_dimensions.group('H', hidden=True)
worksheet.column_dimensions.group('I', hidden=True)
worksheet.column_dimensions.group('K', hidden=True)
worksheet.column_dimensions.group('L', hidden=True)
worksheet.column_dimensions.group('M', hidden=True)
worksheet.column_dimensions.group('N', hidden=True)
worksheet.column_dimensions.group('O', hidden=True)
worksheet.column_dimensions.group('P', hidden=True)
worksheet.column_dimensions.group('Q', hidden=True)
worksheet.column_dimensions.group('R', hidden=True)
worksheet.column_dimensions.group('S', hidden=True)
worksheet.column_dimensions.group('T', hidden=True)
worksheet.column_dimensions.group('U', hidden=True)
worksheet.column_dimensions.group('V', hidden=True)
worksheet.column_dimensions.group('W', hidden=True)
worksheet.column_dimensions.group('X', hidden=True)
worksheet.column_dimensions.group('Y', hidden=True)
worksheet.column_dimensions.group('Z', hidden=True)
worksheet.column_dimensions.group('AA', hidden=True)
worksheet.column_dimensions.group('AB', hidden=True)
worksheet.column_dimensions.group('AC', hidden=True)
worksheet.column_dimensions.group('AD', hidden=True)
worksheet.column_dimensions.group('AE', hidden=True)
worksheet.column_dimensions.group('AF', hidden=True)
worksheet.column_dimensions.group('AG', hidden=True)
worksheet.column_dimensions.group('AH', hidden=True)
worksheet.column_dimensions.group('AI', hidden=True)
worksheet.column_dimensions.group('AJ', hidden=True)
worksheet.column_dimensions.group('AK', hidden=True)
worksheet.column_dimensions.group('AM', hidden=True)
worksheet.column_dimensions.group('AN', hidden=True)
worksheet.column_dimensions.group('AP', hidden=True)
worksheet.column_dimensions.group('AQ', hidden=True)
worksheet.column_dimensions.group('AR', hidden=True)
worksheet.column_dimensions.group('AS', hidden=True)
worksheet.column_dimensions.group('AT', hidden=True)
worksheet.column_dimensions.group('AU', hidden=True)
worksheet.column_dimensions.group('AV', hidden=True)
worksheet.column_dimensions.group('AW', hidden=True)
worksheet.column_dimensions.group('AX', hidden=True)
worksheet.column_dimensions.group('AY', hidden=True)
worksheet.column_dimensions.group('AZ', hidden=True)
worksheet.column_dimensions.group('BA', hidden=True)
worksheet.column_dimensions.group('BB', hidden=True)
worksheet.column_dimensions.group('BC', hidden=True)
worksheet.column_dimensions.group('BD', hidden=True)
worksheet.column_dimensions.group('BE', hidden=True)
worksheet.column_dimensions.group('BF', hidden=True)
worksheet.column_dimensions.group('BH', hidden=True)
worksheet.column_dimensions.group('BI', hidden=True)
worksheet.column_dimensions.group('BJ', hidden=True)
worksheet.column_dimensions.group('BK', hidden=True)
worksheet.column_dimensions.group('BL', hidden=True)
worksheet.column_dimensions.group('BM', hidden=True)
worksheet.column_dimensions.group('BN', hidden=True)
worksheet.column_dimensions.group('BO', hidden=True)
worksheet.column_dimensions.group('BP', hidden=True)
worksheet.column_dimensions.group('BQ', hidden=True)
worksheet.column_dimensions.group('BR', hidden=True)
worksheet.column_dimensions.group('BS', hidden=True)
worksheet.column_dimensions.group('BT', hidden=True)
worksheet.column_dimensions.group('BU', hidden=True)
worksheet.column_dimensions.group('BV', hidden=True)
worksheet.column_dimensions.group('BW', hidden=True)
worksheet.column_dimensions.group('BX', hidden=True)
worksheet.column_dimensions.group('BY', hidden=True)
worksheet.column_dimensions.group('BZ', hidden=True)
worksheet.column_dimensions.group('CA', hidden=True)
worksheet.column_dimensions.group('CB', hidden=True)
worksheet.column_dimensions.group('CC', hidden=True)
worksheet.column_dimensions.group('CD', hidden=True)
worksheet.column_dimensions.group('CE', hidden=True)
worksheet.column_dimensions.group('CF', hidden=True)
worksheet.column_dimensions.group('CG', hidden=True)
worksheet.column_dimensions.group('CH', hidden=True)
worksheet.column_dimensions.group('CI', hidden=True)
worksheet.column_dimensions.group('CJ', hidden=True)
worksheet.column_dimensions.group('CK', hidden=True)
worksheet.column_dimensions.group('CL', hidden=True)
worksheet.column_dimensions.group('CM', hidden=True)
worksheet.column_dimensions.group('CN', hidden=True)
worksheet.column_dimensions.group('CO', hidden=True)
worksheet.column_dimensions.group('CP', hidden=True)
worksheet.column_dimensions.group('CQ', hidden=True)
worksheet.column_dimensions.group('CR', hidden=True)
worksheet.column_dimensions.group('CS', hidden=True)
worksheet.column_dimensions.group('CU', hidden=True)
routecolumn = worksheet.columns[58]
i = 2
supervisorheader = worksheet.cell("CV1")
s = Style(font=Font(bold=True))
supervisorheader.style = s
worksheet['CV1'] = 'Supervisor'
for route in routecolumn:
if route.value == 'Responsible Route':
continue
if route.value in Gordon:
pos = Gordon.index(route.value)
worksheet['CV' + str(i)].value = 'Gordon'
i += 1
elif route.value in VA:
pos = VA.index(route.value)
worksheet['CV' + str(i)].value = 'Vinny A'
i += 1
elif route.value in VP:
pos = VP.index(route.value)
worksheet['CV' + str(i)].value = 'Vinny P'
i += 1
elif route.value in Pete:
pos = Pete.index(route.value)
worksheet['CV' + str(i)].value = 'Pete'
i += 1
elif route.value in Mike:
pos = Mike.index(route.value)
worksheet['CV' + str(i)].value = 'Mike'
i += 1
elif route.value not in Gordon or route.value not in VA or route.value not in VP or route.value not in Pete \
or route.value not in Mike:
worksheet['CV' + str(i)].value = 'Building'
i += 1
workbook.save(filename)
newfilename = filename.strip(".xlsx")
newfilename = newfilename + ".xls"
os.rename('/home/MY NAME/'+filename, '/home/MY NAME/'+newfilename)
return send_file(newfilename, attachment_filename='tdx.xls', as_attachment=True), os.remove ('/home/MY NAME/'+newfilename)
if __name__ == '__main__':
app.run(debug = True, host = '0.0.0.0')
My recommendation: Use xlrd to read the values you need, and openpyxl to create a new xlsx workbook. You just need to be able to get all of the relevant info using xlrd.
The only thing you're doing which isn't just reading values from cells is selecting the "active" worksheet. I found this Google Groups post in a thread titled "xlrd: Get active sheet" which says to try using either the .sheet_selected
attribute or the .sheet_visible
attribute, with the following code for checking which works:
import xlrd
b = xlrd.open_workbook("vis.xls") # where vis.xls is your test file
for i, s in enumerate(b.sheets()):
print(i, s.name, s.sheet_selected, s.sheet_visible)
It also notes:
These two attributes are documented in
sheet.py
in the dict_WINDOW_2
which starts at about line 2.sheet_visible
is probably what you want. Ignore the MS docs which show this bit as undefined.