I'm exporting a database query as Excel and I am getting rows with RTF formatting.
How can I convert these fields into plain text? I've found answers that are pretty old, so I was wondering if anyone knows a way.
I'm revisiting this question to provide 2 javascript solutions, rather than a .NET
one.
const parseRTF = require("rtf-parser");
let rtf = `{\\rtf1\\ansi\\deff0\\nouicompat{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}{\\f1\\fnil\\fcharset204 Calibri;}{\\f2\\fnil Calibri;}} {\\colortbl ;\\red0\\green0\\blue0;} {\\*\\generator Riched20 10.0.19041}\\viewkind4\\uc1 \\pard\\cf1\\f0\\fs18\\lang1033 WEB1616 \\f1\\lang1071\\'ef\\'eb\\'e0\\'f2\\'e5\\'ed\\'ee \\'f1\\'ee \\'ea\\'e0\\'f0\\'f2\\'e8\\'f7\\'ea\\'e0\\par \\'ca\\'f0\\'e8\\'f1\\'f2\\'e8\\'ed\\'e0 \\'c3\\'ee\\'eb\\'e0\\'e1\\'ee\\'f1\\'ea\\'e0 077640615\\par \\'c2\\'e0\\'f0\\'f8\\'e0\\'e2\\'f1\\'ea\\'e0 6\\'e0\\par 1000 \\'d1\\'ea\\'ee\\'ef\\'bc\\'e5\\f2\\lang1033\\par } `;
function convertRTFtoPlainText(rtf) {
return new Promise((resolve, reject) => {
parseRTF.string(rtf, (err, doc) => {
if (err) {
reject(err);
}
let string = "";
doc.content.forEach((item) => {
if (item.content) {
item.content.forEach((span) => {
string += span.value;
});
} else {
string += item.value;
}
});
resolve(string.trim());
});
});
}
(async () => {
let value = await convertRTFtoPlainText(rtf);
console.log(value);
})();
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
function stringToArrayBuffer(string) {
if (string == null) return;
let buffer = new ArrayBuffer(string.length);
let bufferView = new Uint8Array(buffer);
for (let i = 0; i < string.length; i++) {
bufferView[i] = string.charCodeAt(i);
}
return buffer;
}
// callback = function to run after the DOM has rendered, defined when calling runRtfjs
function runRtfjs(rtf, callback, errorCallback) {
const virtualConsole = new jsdom.VirtualConsole();
virtualConsole.sendTo(console);
let dom = new JSDOM(
`
<script src="./node_modules/rtf.js/dist/RTFJS.bundle.js"></script>
<script>
RTFJS.loggingEnabled(false);
try {
const doc = new RTFJS.Document(rtfFile);
const meta = doc.metadata();
doc
.render()
.then(function(htmlElements) {
const div = document.createElement("div");
div.append(...htmlElements);
// window.done(meta, div.innerHTML);
// window.done(meta, div.innerText);
window.done(meta, div.textContent); // pass the data to the callback
}).catch(error => window.onerror(error))
} catch (error){
window.onerror(error)
}
</script>
`,
{
resources: "usable",
runScripts: "dangerously",
url: "file://" + __dirname + "/",
virtualConsole,
beforeParse(window) {
window.rtfFile = stringToArrayBuffer(rtf);
window.done = function (meta, html) {
callback(meta, html); // call the callback
};
window.onerror = function (error) {
errorCallback(error);
};
},
}
);
}
let rtf = `{\\rtf1\\ansi\\deff0\\nouicompat{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}{\\f1\\fnil\\fcharset204 Calibri;}{\\f2\\fnil Calibri;}} {\\colortbl ;\\red0\\green0\\blue0;} {\\*\\generator Riched20 10.0.19041}\\viewkind4\\uc1 \\pard\\cf1\\f0\\fs18\\lang1033 WEB1616 \\f1\\lang1071\\'ef\\'eb\\'e0\\'f2\\'e5\\'ed\\'ee \\'f1\\'ee \\'ea\\'e0\\'f0\\'f2\\'e8\\'f7\\'ea\\'e0\\par \\'ca\\'f0\\'e8\\'f1\\'f2\\'e8\\'ed\\'e0 \\'c3\\'ee\\'eb\\'e0\\'e1\\'ee\\'f1\\'ea\\'e0 077640615\\par \\'c2\\'e0\\'f0\\'f8\\'e0\\'e2\\'f1\\'ea\\'e0 6\\'e0\\par 1000 \\'d1\\'ea\\'ee\\'ef\\'bc\\'e5\\f2\\lang1033\\par } `;
runRtfjs(
rtf,
(meta, html) => {
console.log(html);
},
(error) => console.error(error)
);