I'm reading a docx file with python-docx
and I'm making some changes in paragraph text, so that, everytime I change the text, I lost the color of some words:
Here is my code:
def get_paragraphs(self, doc, paragraphs = []):
for p in doc.paragraphs:
if p.text:
if p.text[0] == r'{':
if p.text.isspace():
p.text = p.text.replace("Before", "After")
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
self.get_paragraphs(cell, paragraphs)
if not doc._parent:
return paragraphs
I was thinking I could add a tag in the text like "\<red\>Red\<red\>"
, and pass this in the paragraph style, but I don't know how can I do that?
Using the "runs" list of paragraph objects, I can check each attribute, like color, if the text is bold or italic, and after that, make an "html tag" where I can get these parameters using BeatifullSoup:
def get_paragraphs(doc, paragraphs = []):
for p in doc.paragraphs:
if p.text:
if p.text[0] == r'{':
if p.text.isspace():
runs_text = []
for r in p.runs:
text_tag = f'<text color="{r.font.color.rgb}">{r.text}</text>'.replace('Before', 'After')
p.text = ''
# turn html to paragraphs
for text_tag in runs_text:
tag = BeautifulSoup(text_tag).find('text')
text = tag.text
run = p.add_run(text)
color = tuple(int(tag.get('color')[i:i+2], 16) for i in (0, 2, 4))
run.font.color.rgb = RGBColor(*color)
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
get_paragraphs(cell, paragraphs)
if not doc._parent:
return paragraphs