Search code examples
python-3.xpython-docx

Change a Single word color in Paragraph, python-docx


I'm reading a docx file with python-docx and I'm making some changes in paragraph text, so that, everytime I change the text, I lost the color of some words:

Here is my code:

def get_paragraphs(self, doc, paragraphs = []):
    for p in doc.paragraphs:
        if p.text:
            if p.text[0] == r'{':
                continue 
            if p.text.isspace():
                continue

            p.text = p.text.replace("Before", "After")
            paragraphs.append(p.text)

    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                self.get_paragraphs(cell, paragraphs)

    if not doc._parent:
        return paragraphs

I was thinking I could add a tag in the text like "\<red\>Red\<red\>", and pass this in the paragraph style, but I don't know how can I do that?


Solution

  • Using the "runs" list of paragraph objects, I can check each attribute, like color, if the text is bold or italic, and after that, make an "html tag" where I can get these parameters using BeatifullSoup:

    def get_paragraphs(doc, paragraphs = []):
    for p in doc.paragraphs:
        if p.text:
            if p.text[0] == r'{':
                continue 
            if p.text.isspace():
                continue
            
            
            runs_text = []
            for r in p.runs:
                text_tag = f'<text color="{r.font.color.rgb}">{r.text}</text>'.replace('Before', 'After')                
                runs_text.append(text_tag)
            
            p.text = ''
    
            # turn html to paragraphs
            for text_tag in runs_text:
                tag = BeautifulSoup(text_tag).find('text')
                text = tag.text
    
                run = p.add_run(text)
                color = tuple(int(tag.get('color')[i:i+2], 16)  for i in (0, 2, 4))
                run.font.color.rgb = RGBColor(*color)
    
            paragraphs.append(p.text)
            
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                get_paragraphs(cell, paragraphs)
    
    if not doc._parent:
        return paragraphs