Search code examples
javascriptmysqlexpresscsvmysql2

Fast-CSV modify before loading CSV to MySQL


I am trying to load a CSV file to my MYSQL database, however before I do so I need to modify it slightly. The CSV file is Pipe delimitated (|) I have a column in the CSV file called Party:Identification. This column has results such as "a:hello, b:hi c:151 ......" This can go on infinitely. I only need to get the value for c. I have come up with a method that works for this, however I am stuck on how to modify the value before the file is inserted into the database.

I tried replacing all the ":" in the headers with "" and then using .transform to modify the values, however this doesn't appear to change the values in the column, only the header. Code is attached below.

 csv.parseFile(req.file.path, {
        headers: headers => headers.map(function (header) {
            const newHeaders = header.replaceAll(" ", "").replaceAll(":", "")
            console.log(newHeaders)
            return newHeaders
        }),
        delimiter: '|'
    })
        .transform(function(data) {
            console.log(data)
            PartyIdentification: getPartyID(data.partyIdentification)
        })
        .on("error", (err) => console.error(err))
        .on("finish", function () {
            query("LOAD DATA LOCAL INFILE '" +
                file +
                "' INTO TABLE table " +
                " FIELDS TERMINATED BY '|'" +
                " LINES TERMINATED BY  '\n'" +
                " IGNORE 1 ROWS;").then(r =>
                console.log(file)
            )
        })



function getPartyID(str) {
    if (str === undefined) return ""
    const split = str.split(",")
    const value = split.find(val => {
        return val.includes("c")
    })
    if(value === undefined) return ""
    return (value.split(":")[1].trim())
}

Solution

  • You can use a regex to parse the value of c:123 in a string:

    function getPartyID(str) {
      if (str === undefined) return "";
      const m = str.match(/\bc:([^ ]*)/); 
      return m ? m[1] : null;
    }
    
    [
      "a:hello, b:hi c:151 d:foo",
      "a:hello, b:no_c",
    ].forEach(str => {
      console.log(str, '==>', getPartyID(str));
    });

    Output:

    a:hello, b:hi c:151 d:foo ==> 151
    a:hello, b:no_c ==> null
    

    Explanation of regex:

    • \b -- word boundary
    • c: -- literal text
    • ([^ ]*) -- capture group 1 with value, up to and excluding space

    UPDATE 1: Based on additional question on how to insert modified data into MySQL, here is a solution that does not use INFILE, but instead loads the file into memory (here simulated with const input), modifies the data as needed, and constructs a SQL statement that inserts all the data. IMPORTANT: You likely want to add escapes against SQL injections.

    const input = `Name|Party:Identification|Date
    Foo|a:hello, b:hi c:151 d:foo|2022-01-11
    Bar|a:hola, b:hey c:99 d:bar|2022-01-12
    Nix|a:nix, b:ni d:nix|2022-01-13`;
    const partIdFieldName = 'Party:Identification';
    
    function getPartyID(str) {
      if (str === undefined) return "";
      const m = str.match(/\bc:([^ ]*)/); 
      return m ? m[1] : 0;
    }
    
    let partIdIdx = 0;
    let data = input.split(/[\r\n]+/).map((row, idx) => {
      let cells = row.split('|');
      if(idx === 0) {
        partIdIdx = cells.indexOf(partIdFieldName);
      } else {
        cells[partIdIdx] = getPartyID(cells[partIdIdx]);
      }
      return cells;
    });
    //console.log('data', '==>', data);
    
    let sql = 'INSERT INTO tbl_name\n' +
     '  (' + data[0].map(val => '"' + val + '"').join(',') + ')\n' +
     'VALUES\n' +
     data.slice(1).map(row => {
       return '  (' + row.map(val => /^[\d+\.]+$/.test(val)
         ? val
         : '"' + val + '"'
       ).join(',') + ')'
     }).join('\n') + ';';
    console.log(sql);

    Output:

    INSERT INTO tbl_name
      ("Name","Party:Identification","Date")
    VALUES
      ("Foo",151,"2022-01-11")
      ("Bar",99,"2022-01-12")
      ("Nix",0,"2022-01-13");