Search code examples
pythonpyfpdf

FPDF Python Package to Write PDF Files - Inserting extra line breaks. Does this for Multi_cell and cell. Want to remove extra line breaks


I have a python application that traverses directories on the file path passed in as a parameter. As each file is found, the file is opened and data is inserted into multi_cells.

I have checked the TXT values provided to multi_cell and there are no extra line breaks. I have played around with different setting to no avail. When inspecting the PDF output, extra line breaks are inserted. Any help would be appreciated. Look at the end of the script --- the final multicell.

    import os,sys
from fpdf import FPDF
for i in range(1, len(sys.argv)):
    print('argument:', i, 'value:', sys.argv[i])
    mypath = sys.argv[i];

mypath = "D:\\test\\test\\CrossingTrades\\DataDictionary\\testLib\\forms"

class TOC(FPDF):
    def __init__(this, orientation='P',unit='mm',format='A4'):
        this._toc=[]
        this._numbering=0
        this._numberingFooter=0
        this._numPageNum=1
        FPDF.__init__(this,orientation,unit,format)
    def header(self):
                self.set_font('Courier', 'B', 15);
                self.cell(0, 5, "Tradeflow Format and Rule Documentation", 0, 0, 'C')
                self.ln(20)

    def footer(self):
                self.set_y(-10)
                self.set_font('Arial', 'I', 8)
        
        # Add a page number
                page = 'Page ' + str(self.page_no()) + '/{nb}'
                self.cell(0, 10, page, 0, 0, 'C')

    def startPageNums(this):
        this._numbering=1
        this._numberingFooter=1

    def stopPageNums(this):
        this._numbering=0

    def numPageNo(this):
        return this._numPageNum

    def TOC_Entry(this,txt,level,pageNumber):
        this._numPageNum=pageNumber
        this._toc+=[{'t':txt,'l':level,'p':this.numPageNo()}]

        #print("PAGE NO IS ",this.numPageNo())

    def insertTOC(this,location=1,labelSize=20,entrySize=10,tocfont='Times',label='Table of Contents'):
        #make toc at end
        this.stopPageNums()
        #this.AddPage()
        this.add_page();
        this._numPageNum+=1
        tocstart=this.page

        this.set_font(tocfont,'B',labelSize)
        this.cell(0,5,label,0,1,'C')
        this.ln(10)

        for t in this._toc:
            #Offset
            level=t['l']
            if(level>0):
                this.cell(level*8)
            weight=''
            if(level==0):
                weight='B'
            Str=t['t']
            this.set_font(tocfont,weight,entrySize)
            strsize=this.get_string_width(Str)
            this.cell(strsize+2,this.font_size+2,Str)

            #Filling dots
            this.set_font(tocfont,'',entrySize)
            PageCellSize=this.get_string_width(str(t['p']))+2
            w=this.w-this.l_margin-this.r_margin-PageCellSize-(level*8)-(strsize+2)
            #nb=w/this.get_string_width('.')
            dots = "........................................................................................................"
            #dots.replace('.', '.'*60, 1)
            this.cell(w,this.font_size+2,dots,0,0,'R')

            #Page number
            this.cell(PageCellSize,this.font_size+2,str(t['p']),0,1,'R')

        #grab it and move to selected location
        n=this.page
        n_toc = n - tocstart + 1
        last = []

        #store toc pages
        for i in range(tocstart,n+1):
            last+=[this.pages[i]]

        #move pages
        for i in range(tocstart-1,location-1,-1):
        #~ for(i=tocstart - 1;i>=location-1;i--)
            this.pages[i+n_toc]=this.pages[i]

        #Put toc pages at insert point
        for i in range(0,n_toc):
            this.pages[location + i]=last[i]

    #def Footer(this):
    #   if(this._numberingFooter==0):
    #       return
    #   #Go to 1.5 cm from bottom
    #   this.SetY(-15)
    #   #Select Arial italic 8
    #   this.SetFont('Arial','I',8)
    #   this.Cell(0,7,str(this.numPageNo()),0,0,'C');
    #   if(this._numbering==0):
    #       this._numberingFooter=0

class DPLFormat: 
    def __init__(self, name,arguments,contentsList,callerList,callingList): 
        self.name = name 
        self.arguments = arguments
        self.contentsList = contentsList
        self.callerList = callerList
        self.callingList = callingList

class DPLRule: 
    def __init__(self, name,arguments,contentsList,callerList,callingList): 
        self.name = name 
        self.arguments = arguments
        self.contentsList = contentsList
        self.callerList = callerList
        self.callingList = callingList

def get_filepaths(directory):
    """
    This function will generate the file names in a directory 
    tree by walking the tree either top-down or bottom-up. For each 
    directory in the tree rooted at directory top (including top itself), 
    it yields a 3-tuple (dirpath, dirnames, filenames).
    """
    file_paths = []  # List which will store all of the full filepaths.
    #print ("Opening:", directory)
    # Walk the tree.
    for root, directories, files in os.walk(directory):
        for filename in files:
            if ".cfformat" in filename and ".cfformatprop" not in filename: 
                # Join the two strings in order to form the full filepath.
                filepath = os.path.join(root, filename)
                file_paths.append(filepath)  # Add it to the list.
                #file_paths.append(filename)  # Add it to the list.
    return file_paths  # Self-explanatory.

# PDF Handling
# save FPDF() class into a
# variable pdf

pdf = TOC('P', 'mm', 'A4')
pdf.alias_nb_pages()
# Run the above function and store its results in a variable.   
full_file_paths = get_filepaths(mypath)
formatList = []
tocList = []
print ("Beginning Processing ",mypath)
for index, item in enumerate(full_file_paths):
    formatName = item.replace(".cfformat","")
    #print(index,"",formatName)
    formatArgs = ""
    ruleFlag = 1
    contentsList = []
    callerList = []
    callingList = []

    #Find format in files
    full_file_paths2 = get_filepaths(mypath)
    cnt = 0
    for index2, item2 in enumerate(full_file_paths2):
        formatName2 = item2.replace(".cfformat","")
        #if cnt == 0:
      #      print("Searching ",os.path.basename(formatName)," IN ",os.path.basename(formatName2))

        with open(item2,'r') as fp2:
            line2 = fp2.readline()
            #print ("Opening ",os.path.basename(formatName2))
       
            while line2:
                line2 = fp2.readline()
                if cnt == 0 :
                    if os.path.basename(formatName)in line2 and os.path.basename(formatName) != os.path.basename(formatName2) :
                        callerList.append(os.path.basename(formatName2))
                        cnt += 1

    #---------------------END SEARCH FOR FORMAT IN OTHER FILES------------------
        
    with open(item,'r') as fp:
        line = fp.readline()
        
        if "[" in line and "SQL" or ";" not in line:
            formatArgs = line
            ruleFlag = 0
        cnt = 1
        while line:
            line = fp.readline()
            #print("Line {}: {}".format(cnt, line.strip()))
            #if "!" in line:
            line = line.replace("–", "-")
            #res = bytes(line,'UTF-8')
            contentsList.append(line)
            if "@" in line:
                callingList.append(line)
            
            cnt += 1
    if formatArgs != "":
        formatList.append( DPLFormat(os.path.basename(formatName),formatArgs,contentsList,callerList,callingList) )

#Now go through format files
pdf.set_font("Courier", size = 8)
formatList.sort(key=lambda x: x.name)
pdf.startPageNums()
for obj in formatList:
  #print( obj.name, obj.arguments,sep =' ' )
  # Add a page
  pdf.add_page()
  pdf.TOC_Entry(obj.name,1,pdf.page_no())

# caller list
  pdf.set_font('Courier', 'B', size=13);
  pdf.cell(200, 10, txt = obj.name ,
         ln = 1, align = 'C')
  ii = 0
  pdf.set_font('Courier', 'I', size=10);
  pdf.cell(200, 10, txt = "Called By: " ,
         ln = 1, align = 'L')
  callerStr=""
  #print ("Caller list length is ", len(obj.callerList))
  
  while ii != len (obj.callerList):
      callerStr=callerStr+obj.callerList[ii]
      ii += 1
 
  pdf.multi_cell(0, 8, callerStr, 1, 'J')

  #calling list

  pdf.set_font('Courier', 'I', size=10);
  ii = 0
  pdf.cell(200, 10, txt = "Calling: " ,
         ln = 1, align = 'L')
  callingStr=""
  #print ("Caller list length is ", len(obj.callerList))
  
  while ii != len (obj.callingList):
      callingStr=callingStr+obj.callingList[ii]
      ii += 1
  pdf.set_font('Courier',size=8);    
  pdf.multi_cell(0, 8, callingStr, 1, 'J')

  #contents
  pdf.set_font('Courier',size=8);   
  
  i = 0
  codeStr = ""
  while i != len (obj.contentsList):
      codeStr=codeStr+obj.contentsList[i]
      i += 1
  pdf.multi_cell(0, 8, codeStr, 1, 'J')

 
# save the pdf with name .pdf
print ("\nWriting PDF Documentation")
pdf.insertTOC()
pdf.output("D:\\DPLAutoDoc.pdf")
print ("\nFinished")

Solution

  • The width in multi cell call, the first parameter is cell size or width. In portrait set that to something like 190. The second value actually controls spacing between lines so set to a value like 1 or 2. Set justification to left.