Search code examples
vbaexcel64-bitrtf

Convert RTF (Rich Text Format) code into plain text in Excel


I'm exporting a database query as Excel and I am getting rows with RTF formatting.

Here is a screenshot of the Excel

How can I convert these fields into plain text? I've found answers that are pretty old, so I was wondering if anyone knows a way.


Solution

  • I'm revisiting this question to provide 2 javascript solutions, rather than a .NET one.

    Approach 1

    const parseRTF = require("rtf-parser");
    
    let rtf = `{\\rtf1\\ansi\\deff0\\nouicompat{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}{\\f1\\fnil\\fcharset204 Calibri;}{\\f2\\fnil Calibri;}}  {\\colortbl ;\\red0\\green0\\blue0;}  {\\*\\generator Riched20 10.0.19041}\\viewkind4\\uc1   \\pard\\cf1\\f0\\fs18\\lang1033 WEB1616 \\f1\\lang1071\\'ef\\'eb\\'e0\\'f2\\'e5\\'ed\\'ee \\'f1\\'ee \\'ea\\'e0\\'f0\\'f2\\'e8\\'f7\\'ea\\'e0\\par  \\'ca\\'f0\\'e8\\'f1\\'f2\\'e8\\'ed\\'e0 \\'c3\\'ee\\'eb\\'e0\\'e1\\'ee\\'f1\\'ea\\'e0 077640615\\par  \\'c2\\'e0\\'f0\\'f8\\'e0\\'e2\\'f1\\'ea\\'e0 6\\'e0\\par  1000 \\'d1\\'ea\\'ee\\'ef\\'bc\\'e5\\f2\\lang1033\\par  }  `;
    
    function convertRTFtoPlainText(rtf) {
        return new Promise((resolve, reject) => {
            parseRTF.string(rtf, (err, doc) => {
                if (err) {
                    reject(err);
                }
    
                let string = "";
    
                doc.content.forEach((item) => {
                    if (item.content) {
                        item.content.forEach((span) => {
                            string += span.value;
                        });
                    } else {
                        string += item.value;
                    }
                });
    
                resolve(string.trim());
            });
        });
    }
    
    (async () => {
        let value = await convertRTFtoPlainText(rtf);
    
        console.log(value);
    })();
    

    Approach 2

    const jsdom = require("jsdom");
    const { JSDOM } = jsdom;
    
    function stringToArrayBuffer(string) {
        if (string == null) return;
        let buffer = new ArrayBuffer(string.length);
        let bufferView = new Uint8Array(buffer);
        for (let i = 0; i < string.length; i++) {
            bufferView[i] = string.charCodeAt(i);
        }
        return buffer;
    }
    
    // callback = function to run after the DOM has rendered, defined when calling runRtfjs
    function runRtfjs(rtf, callback, errorCallback) {
        const virtualConsole = new jsdom.VirtualConsole();
        virtualConsole.sendTo(console);
    
        let dom = new JSDOM(
            `
                <script src="./node_modules/rtf.js/dist/RTFJS.bundle.js"></script>
    
                <script>
    
                    RTFJS.loggingEnabled(false);
    
                    try {
                        const doc = new RTFJS.Document(rtfFile);
    
                        const meta = doc.metadata();
                        doc
                            .render()
                            .then(function(htmlElements) {
    
                                const div = document.createElement("div");
                                div.append(...htmlElements);
    
                                // window.done(meta, div.innerHTML);
                                // window.done(meta, div.innerText);
                                window.done(meta, div.textContent); // pass the data to the callback
    
                        }).catch(error => window.onerror(error))
                    } catch (error){
                        window.onerror(error)
                    }
                </script>
            `,
            {
                resources: "usable",
                runScripts: "dangerously",
                url: "file://" + __dirname + "/",
                virtualConsole,
                beforeParse(window) {
                    window.rtfFile = stringToArrayBuffer(rtf);
                    window.done = function (meta, html) {
                        callback(meta, html); // call the callback
                    };
                    window.onerror = function (error) {
                        errorCallback(error);
                    };
                },
            }
        );
    }
    
    let rtf = `{\\rtf1\\ansi\\deff0\\nouicompat{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}{\\f1\\fnil\\fcharset204 Calibri;}{\\f2\\fnil Calibri;}}  {\\colortbl ;\\red0\\green0\\blue0;}  {\\*\\generator Riched20 10.0.19041}\\viewkind4\\uc1   \\pard\\cf1\\f0\\fs18\\lang1033 WEB1616 \\f1\\lang1071\\'ef\\'eb\\'e0\\'f2\\'e5\\'ed\\'ee \\'f1\\'ee \\'ea\\'e0\\'f0\\'f2\\'e8\\'f7\\'ea\\'e0\\par  \\'ca\\'f0\\'e8\\'f1\\'f2\\'e8\\'ed\\'e0 \\'c3\\'ee\\'eb\\'e0\\'e1\\'ee\\'f1\\'ea\\'e0 077640615\\par  \\'c2\\'e0\\'f0\\'f8\\'e0\\'e2\\'f1\\'ea\\'e0 6\\'e0\\par  1000 \\'d1\\'ea\\'ee\\'ef\\'bc\\'e5\\f2\\lang1033\\par  }  `;
    
    runRtfjs(
        rtf,
        (meta, html) => {
            console.log(html);
        },
        (error) => console.error(error)
    );