Search code examples
javascriptparsingbinaryfiles

How can I get started parsing binary files in JavaScript?


I have some binary files I'd like to be able to parse within the browser. I have found some python code that (I think) does exactly what I need but I don't understand python enough to interpret what I'm seeing.

I have some sample files on my own code repository, and below is my attempt at parsing these files. You can drag files into the snippet window to parse that file

const elBody = document.body;
const dragClass = "drag-over";

const fReader = new FileReader();
fReader.onload = function (e) {
    const data = e.target.result;
    processFile(data);
};

elBody.addEventListener("dragover", (dragEvent) => {
  dragEvent.preventDefault();
  if (!elBody.classList.contains(dragClass)) {
    elBody.classList.add(dragClass);
  }
});
elBody.addEventListener("dragleave", () => {
  elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
  dropEvent.preventDefault();
  elBody.classList.remove(dragClass);
  [...dropEvent.dataTransfer.items].forEach((item, i) => {
    if (item.kind === "file") {
      const file = item.getAsFile();
      fReader.readAsArrayBuffer(file, 'utf-8');
    }
  });
});

function processFile(arrayBuffer){
    const byteLength = 4;

    const b1 = new Int32Array(arrayBuffer.slice(0, byteLength))[0]
    console.log(b1)

    // for (let startIdx = 0; startIdx < arrayBuffer.byteLength; startIdx += byteLength) {
    //     const byte = new Int32Array(arrayBuffer.slice(startIdx, startIdx + byteLength))[0]
    //     console.log(byte)
    // }
}
 html,body {height: 100vh;margin: 0;}
.drag-over {background-color: #ccc;}
<h1>Drop a file here</h1>

I am just not sure if I am on the correct track or not. My only source for information about how these type of files are structured come from these comments in the python file

How do I take that information about the file format and convert that into a way to perform the same actions in JavaScript?


Solution

  • This worked for me on your first example file.

    Less so on the Spanish one with wingdings in it

    If you need to parse the text it can be implemented fairly easily.

    The dataview is useful here

    const processFile = (arrayBuffer) => {
      let offset = 0;
      const dataView = new DataView(arrayBuffer);
      // Read the first 4 bytes as a 32-bit integer; true denotes little-endian
      const firstInteger = dataView.getInt32(offset, true); 
      console.log("First integer:", firstInteger);
      // Increment the offset by 4 bytes
      offset += 4;
      // Loop through the buffer and read bytes based on the context
      let outputString = "";
      while (offset < arrayBuffer.byteLength) {
        // Read one byte to identify the character set
        const char = dataView.getInt8(offset);
        offset += 1;
        // If the char code is a printable ASCII character, append to output string
        if (char >= 32 && char <= 126) {
          
          outputString += String.fromCharCode(char);
        }
        // If the char code corresponds to a newline, add a newline character
        else if (char === 10) {
          outputString += "\n";
        }
        // For other cases, handle them based on your specific needs
        // for example your paragraph markers
      }
      outputString = outputString
        .replace(/%(\w+)/g,"<h2>$1</h2>")
      document.getElementById("output").innerHTML = outputString;
    };
    html,
    body {
      height: 100vh;
      margin: 0;
    }
    
    .drag-over {
      background-color: #ccc;
    }
    pre { margin-bottom: 120px; }
    <h1>Drop a file here</h1>
    <pre id="output"></pre>
    <hr/>
    <script>
      // file reader code - no modified except to add identifiers
    
      const fReader = new FileReader();
      fReader.onload = function(e) {
        const data = e.target.result;
        processFile(data);
      };
      const dragClass = "drag-over"
      elBody = document.querySelector("h1");
      elBody.addEventListener("dragover", (dragEvent) => {
        dragEvent.preventDefault();
        if (!elBody.classList.contains(dragClass)) {
          elBody.classList.add(dragClass);
        }
      });
      elBody.addEventListener("dragleave", () => {
        elBody.classList.remove(dragClass);
      });
      elBody.addEventListener("drop", (dropEvent) => {
        dropEvent.preventDefault();
        elBody.classList.remove(dragClass);
        [...dropEvent.dataTransfer.items].forEach((item, i) => {
          if (item.kind === "file") {
            const file = item.getAsFile();
            fReader.readAsArrayBuffer(file, 'utf-8');
          }
        });
      });
    </script>

    Alternative to dataview

    const processFile = (arrayBuffer) => {
      let offset = 0;
      const outputArr = [];
    
      const firstInteger = new Int32Array(arrayBuffer.slice(offset, offset + 4))[0];
      console.log("First integer:", firstInteger);
      offset += 4;
    
      const int8View = new Int8Array(arrayBuffer);
    
      for (; offset < int8View.length; offset++) {
        const char = int8View[offset];
        if (char >= 32 && char <= 126) {
          outputArr.push(String.fromCharCode(char));
        } else if (char === 10) {
          outputArr.push("\n");
        }
      }
    
      document.getElementById("output").innerHTML = outputArr
        .join("")
        .replace(/%(\w+)/g, "<h2>$1</h2>");
    };
    html,
    body {
      height: 100vh;
      margin: 0;
    }
    
    .drag-over {
      background-color: #ccc;
    }
    
    pre {
      margin-bottom: 120px;
    }
    <h1>Drop a file here</h1>
    <pre id="output"></pre>
    <hr/>
    <script>
      // file reader code - no modified except to add identifiers
    
      const fReader = new FileReader();
      fReader.onload = function(e) {
        const data = e.target.result;
        processFile(data);
      };
      const dragClass = "drag-over"
      elBody = document.querySelector("h1");
      elBody.addEventListener("dragover", (dragEvent) => {
        dragEvent.preventDefault();
        if (!elBody.classList.contains(dragClass)) {
          elBody.classList.add(dragClass);
        }
      });
      elBody.addEventListener("dragleave", () => {
        elBody.classList.remove(dragClass);
      });
      elBody.addEventListener("drop", (dropEvent) => {
        dropEvent.preventDefault();
        elBody.classList.remove(dragClass);
        [...dropEvent.dataTransfer.items].forEach((item, i) => {
          if (item.kind === "file") {
            const file = item.getAsFile();
            fReader.readAsArrayBuffer(file, 'utf-8');
          }
        });
      });
    </script>