I have some binary files I'd like to be able to parse within the browser. I have found some python code that (I think) does exactly what I need but I don't understand python enough to interpret what I'm seeing.
I have some sample files on my own code repository, and below is my attempt at parsing these files. You can drag files into the snippet window to parse that file
const elBody = document.body;
const dragClass = "drag-over";
const fReader = new FileReader();
fReader.onload = function (e) {
const data = e.target.result;
processFile(data);
};
elBody.addEventListener("dragover", (dragEvent) => {
dragEvent.preventDefault();
if (!elBody.classList.contains(dragClass)) {
elBody.classList.add(dragClass);
}
});
elBody.addEventListener("dragleave", () => {
elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
dropEvent.preventDefault();
elBody.classList.remove(dragClass);
[...dropEvent.dataTransfer.items].forEach((item, i) => {
if (item.kind === "file") {
const file = item.getAsFile();
fReader.readAsArrayBuffer(file, 'utf-8');
}
});
});
function processFile(arrayBuffer){
const byteLength = 4;
const b1 = new Int32Array(arrayBuffer.slice(0, byteLength))[0]
console.log(b1)
// for (let startIdx = 0; startIdx < arrayBuffer.byteLength; startIdx += byteLength) {
// const byte = new Int32Array(arrayBuffer.slice(startIdx, startIdx + byteLength))[0]
// console.log(byte)
// }
}
html,body {height: 100vh;margin: 0;}
.drag-over {background-color: #ccc;}
<h1>Drop a file here</h1>
I am just not sure if I am on the correct track or not. My only source for information about how these type of files are structured come from these comments in the python file
How do I take that information about the file format and convert that into a way to perform the same actions in JavaScript?
This worked for me on your first example file.
Less so on the Spanish one with wingdings in it
If you need to parse the text it can be implemented fairly easily.
The dataview is useful here
const processFile = (arrayBuffer) => {
let offset = 0;
const dataView = new DataView(arrayBuffer);
// Read the first 4 bytes as a 32-bit integer; true denotes little-endian
const firstInteger = dataView.getInt32(offset, true);
console.log("First integer:", firstInteger);
// Increment the offset by 4 bytes
offset += 4;
// Loop through the buffer and read bytes based on the context
let outputString = "";
while (offset < arrayBuffer.byteLength) {
// Read one byte to identify the character set
const char = dataView.getInt8(offset);
offset += 1;
// If the char code is a printable ASCII character, append to output string
if (char >= 32 && char <= 126) {
outputString += String.fromCharCode(char);
}
// If the char code corresponds to a newline, add a newline character
else if (char === 10) {
outputString += "\n";
}
// For other cases, handle them based on your specific needs
// for example your paragraph markers
}
outputString = outputString
.replace(/%(\w+)/g,"<h2>$1</h2>")
document.getElementById("output").innerHTML = outputString;
};
html,
body {
height: 100vh;
margin: 0;
}
.drag-over {
background-color: #ccc;
}
pre { margin-bottom: 120px; }
<h1>Drop a file here</h1>
<pre id="output"></pre>
<hr/>
<script>
// file reader code - no modified except to add identifiers
const fReader = new FileReader();
fReader.onload = function(e) {
const data = e.target.result;
processFile(data);
};
const dragClass = "drag-over"
elBody = document.querySelector("h1");
elBody.addEventListener("dragover", (dragEvent) => {
dragEvent.preventDefault();
if (!elBody.classList.contains(dragClass)) {
elBody.classList.add(dragClass);
}
});
elBody.addEventListener("dragleave", () => {
elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
dropEvent.preventDefault();
elBody.classList.remove(dragClass);
[...dropEvent.dataTransfer.items].forEach((item, i) => {
if (item.kind === "file") {
const file = item.getAsFile();
fReader.readAsArrayBuffer(file, 'utf-8');
}
});
});
</script>
Alternative to dataview
const processFile = (arrayBuffer) => {
let offset = 0;
const outputArr = [];
const firstInteger = new Int32Array(arrayBuffer.slice(offset, offset + 4))[0];
console.log("First integer:", firstInteger);
offset += 4;
const int8View = new Int8Array(arrayBuffer);
for (; offset < int8View.length; offset++) {
const char = int8View[offset];
if (char >= 32 && char <= 126) {
outputArr.push(String.fromCharCode(char));
} else if (char === 10) {
outputArr.push("\n");
}
}
document.getElementById("output").innerHTML = outputArr
.join("")
.replace(/%(\w+)/g, "<h2>$1</h2>");
};
html,
body {
height: 100vh;
margin: 0;
}
.drag-over {
background-color: #ccc;
}
pre {
margin-bottom: 120px;
}
<h1>Drop a file here</h1>
<pre id="output"></pre>
<hr/>
<script>
// file reader code - no modified except to add identifiers
const fReader = new FileReader();
fReader.onload = function(e) {
const data = e.target.result;
processFile(data);
};
const dragClass = "drag-over"
elBody = document.querySelector("h1");
elBody.addEventListener("dragover", (dragEvent) => {
dragEvent.preventDefault();
if (!elBody.classList.contains(dragClass)) {
elBody.classList.add(dragClass);
}
});
elBody.addEventListener("dragleave", () => {
elBody.classList.remove(dragClass);
});
elBody.addEventListener("drop", (dropEvent) => {
dropEvent.preventDefault();
elBody.classList.remove(dragClass);
[...dropEvent.dataTransfer.items].forEach((item, i) => {
if (item.kind === "file") {
const file = item.getAsFile();
fReader.readAsArrayBuffer(file, 'utf-8');
}
});
});
</script>