I have created a typescript script that takes in a JSON string and converts the information it contains into a sort of poster that is a Word Doc using Docx. There are some parts of the JSON that could have some html tags contained in it (not tables or css classes but it could be a <br/>, <i>, <p>
, etc) and I want to be able to pass the string in and it return one of the DocX objects like a Paragraph
or a TextRun
.
I have found the package Html-to-Docx but from the look of the docs it wants to take one whole html string and return a file, whereas I want to be able to change and have more control over what goes in and is displayed in the WordDoc. I also dont want to have to change all my code over to a different library like OpenXML.
Is there another library/something I'm missing in DocX that could achieve this? Or would I have to parse my own strings to find where the <br/>, <i>, <p>
tags are and try and parse them separately into DocX objects?
Any help would be appreciated, thanks
I've created this basic html tag converter, it won't work for all use cases but if someone wants to add to it to include more tag support or improve the code they can, but this works for my use case
const htmlTagMatch =
content.match(/<\/?[^>]+(>|$)|[^<]+/g) || [];
const paragraphStack = [];
let currentParagraph = [];
let currentBullet = null;
let pCount = 0;
let formattingStack = [];
htmlTagMatch.forEach((item, index) =>
{
if (item.startsWith('<') && item.endsWith('>')) {
// Handle HTML tags
var tag = item.toLowerCase();
if (pCount % 2 == 0 && tag == '<p>') {
tag = '</p>';
}
switch (tag) {
case '<i>':
case '<em>':
formattingStack.push('italics');
break;
case '<b>':
case '<strong>':
formattingStack.push('bold');
break;
case '<ul>':
if (currentParagraph.length > 0) {
paragraphStack.push(
createParagraph(currentParagraph, currentBullet)
);
}
currentParagraph = [];
break;
case '<li>':
if (currentParagraph.length > 0) {
paragraphStack.push(
createParagraph(currentParagraph, currentBullet)
);
}
currentBullet = { level: 0 };
currentParagraph = [];
break;
case '</i>':
case '</em>':
case '</b>':
case '</strong>':
formattingStack.pop();
break;
case '</ul>':
if (currentParagraph.length > 0) {
paragraphStack.push(
createParagraph(currentParagraph, currentBullet)
);
}
currentBullet = null;
currentParagraph = [];
break;
case '<br/>':
case '<br>':
currentParagraph.push(new TextRun({ break: 1 }));
break;
case '</p>':
case '<p>':
pCount++;
if (htmlTagMatch[index + 1] == '<p>' && htmlTagMatch[index] == '</p>') {
break;
}
// Add a line break
if (pCount == 1 && currentParagraph.length > 0) {
currentParagraph.push(new TextRun({ break: 2 }));
}
if (pCount != 1) {
currentParagraph.push(new TextRun({ break: 2 }));
}
break;
}
} else {
// Handle plain text
let textRun = new TextRun({ text: item });
if (formattingStack.includes('italics')) {
textRun = new TextRun({ text: item, italics: true });
}
if (formattingStack.includes('bold')) {
textRun = new TextRun({ text: item, bold: true });
}
currentParagraph.push(textRun);
}
});
// Add the last paragraph to the paragraphStack
if (currentParagraph.length > 0) {
paragraphStack.push(createParagraph(currentParagraph, currentBullet));
}
return paragraphStack;