Search code examples
pythonpandasdata-extractionpython-camelot

How to provide table areas as an input in camelot-Python


I am making a python script, where user can provide a pdf and the table areas and then it extracts the table and convert it into csv file. But how to take an input here and add it into the command.

import camelot
import pandas as pd
pdf_line = input("Enter pdf path: ")
#print(pdf_line)
#tables = camelot.read_pdf("/Users/kvidushi/Desktop/1.pdf")
tables = camelot.read_pdf(pdf_line)
x1 =float(input("Enter coordinate x1: "))
y1 =input("Enter coordinate y1: ")
x2 = input("Enter coordinate x2: ")
y2 = input("Enter coordinate y2: ")
print(type(x1))
#regions="['"+x1+"','"+y1+"','"+x2+"','"+y2+"']"
#regions = ['50', '499','566','420']
#tables=camelot.read_pdf(pdf_line, table_regions=regions)
tables = camelot.read_pdf(pdf_line, flavor='stream', table_areas=['x1,x2,x3,x4'],Index='false',ignore_index=True)
#tables = camelot.read_pdf(pdf_line, flavor='stream', table_areas=['50,499,566,420'],Index='false',ignore_index=True)
#tables[0].df.columns = tables[0].df.iloc[0]
#tables[0].df =tables[0].df.drop(0)
#tables.export('tables.xls', f='excel')
print(tables[0].df)
tables[0].to_excel("1.xlsx", index=False)

Solution

  • have you tried:

    table_areas=[f'{x1},{y1},{x2},{y2}']