I am trying to bulk stamp a number of pdf files, I found something on github that does something very similar but you have to name each file within the script to match with the actual pdf file for it to work.
https://github.com/iprapas/pythonpdf
def stamp_pdf(input_path, stamp_path, output_path, add_frame=False):
output = PdfFileWriter()
create_pdf_stamp(stamp_path, add_frame=add_frame)
pdf_in = PdfFileReader(open(input_path, 'rb'))
pdf_stamp = PdfFileReader(open(stamp_path, 'rb'))
stamp = pdf_stamp.getPage(0)
for i in xrange(pdf_in.getNumPages()):
page = pdf_in.getPage(i)
page.mergePage(stamp)
output.addPage(page)
with open(output_path, 'wb') as f:
output.write(f)
def main():
stamp_pdf('../input/input1.pdf', '../temp/tmp_stamp.pdf', '../output/stamped1.pdf')
stamp_pdf('../input/input1.pdf', '../temp/tmp_stamp.pdf', '../output/stamped1_with_frame.pdf', add_frame=True)
stamp_pdf('../input/input2.pdf', '../temp/tmp_stamp.pdf', '../output/stamped2.pdf')
stamp_pdf('../input/input2.pdf', '../temp/tmp_stamp.pdf', '../output/stamped2_with_frame.pdf', add_frame=True)
if __name__ == "__main__":
main()
I'm sure there's a way to replace the individual file link so that it points directly to the directory and keeps the file name with it as well. Any pointers to get me started would be much appreciated as I have been trying out all sorts of codes without much luck.
pathlib
treats paths as objects instead of strings
pathlib
objects have methods that work with them (e.g. open
, glob
, name
, ect.)from pathlib import Path
p = Path.cwd()
print(p)
>>> WindowsPath('E:/PythonProjects/DataCamp')
pdf_files = list(p.glob('*.pdf'))
print(pdf_files)
>>> [WindowsPath('E:/PythonProjects/DataCamp/aapl.pdf')]
pdf_name = pdf_files[0].name
print(pdf_name)
>>> 'aapl.pdf'
glob
method to find all the pdf
files, including subdirectories, with **
wildcards
p.glob('**/*.pdf')
name
to get and easily track the filenameout_dir = p / 'output'
print(out_dir)
>>> WindowsPath('E:/PythonProjects/DataCamp/output')
out_pdf = out_dir / f'stamped_{pdf_name}'
print(out_pdf)
>>> WindowsPath('E:/PythonProjects/DataCamp/output/stamped_aapl.pdf')
pythonpdf
library might not work with pathlib
objects:pathlib
objects back to str
print(type(stamp_path))
>>> pathlib.WindowsPath
print(type(str(stamp_path))
>>> str
create_pdf_stamp(str(stamp_path), add_frame=add_frame)
.glob
:.glob
object is a generator functionp = Path('e:/PythonProjects')
files = p.glob('**/*.pdf')
for file in files:
print(file)
...
# do other stuff