The JavaScript method String.fromCharCode()
behaves equivalently to Python's unichar()
in the following sense:
print unichr(213) # prints Õ on the console
console.log(String.fromCharCode(213)); // prints Õ on the console as well
For my purposes, however, I need a JavaScript equivalent to the Python function chr()
. Is there such a JavaScript function or a way to make String.fromCharCode()
behave like chr()
?
That is, I need something in JavaScript that mimics
print chr(213) # prints � on the console
So turns out you just want to work with raw bytes in node.js, there's a module for that. If you are a real wizard, you can get this stuff to work with javascript strings alone but it's harder and far less efficient.
var b = new Buffer(1);
b[0] = 213;
console.log(b.toString()); //�
var b = new Buffer(3);
b[0] = 0xE2;
b[1] = 0x98;
b[2] = 0x85;
console.log(b.toString()); //★
print chr(213) # prints � on the console
So this prints a raw byte (0xD5
), that is interpreted in UTF-8 (most likely) which is not valid UTF-8 byte sequence and thus is displayed as the replacement character (�).
The interpretation as UTF-8 is not relevant here, you most likely just want raw bytes.
To create raw bytes in javascript you could use UInt8Array
.
var a = new Uint8Array(1);
a[0] = 213;
You could optionally then interpret the raw bytes as utf-8:
console.log( utf8decode(a)); // "�"
//Not recommended for production use ;D
//Doesn't handle > BMP to keep the answer shorter
function utf8decode(uint8array) {
var codePoints = [],
i = 0,
byte, codePoint, len = uint8array.length;
for (i = 0; i < len; ++i) {
byte = uint8array[i];
if ((byte & 0xF8) === 0xF0 && len > i + 3) {
codePoint = ((byte & 0x7) << 18) | ((uint8array[++i] & 0x3F) << 12) | ((uint8array[++i] & 0x3F) << 6) | (uint8array[++i] & 0x3F);
if (!(0xFFFF < codePoint && codePoint <= 0x10FFFF)) {
codePoints.push(0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD);
} else {
codePoints.push(codePoint);
}
} else if ((byte & 0xF0) === 0xE0 && len > i + 2) {
codePoint = ((byte & 0xF) << 12) | ((uint8array[++i] & 0x3F) << 6) | (uint8array[++i] & 0x3F);
if (!(0x7FF < codePoint && codePoint <= 0xFFFF)) {
codePoints.push(0xFFFD, 0xFFFD, 0xFFFD);
} else {
codePoints.push(codePoint);
}
} else if ((byte & 0xE0) === 0xC0 && len > i + 1) {
codePoint = ((byte & 0x1F) << 6) | ((uint8array[++i] & 0x3F));
if (!(0x7F < codePoint && codePoint <= 0x7FF)) {
codePoints.push(0xFFFD, 0xFFFD);
} else {
codePoints.push(codePoint);
}
} else if ((byte & 0x80) === 0x00) {
codePoints.push(byte & 0x7F);
} else {
codePoints.push(0xFFFD);
}
}
return String.fromCharCode.apply(String, codePoints);
}
What you are most likely trying to do has nothing to do with trying to interpret the bytes as utf8 though.
Another example:
//UTF-8 For the black star U+2605 ★:
var a = new Uint8Array(3);
a[0] = 0xE2;
a[1] = 0x98;
a[2] = 0x85;
utf8decode(a) === String.fromCharCode(0x2605) //True
utf8decode(a) // ★
In python 2.7 (Ubuntu):
print chr(0xE2) + chr(0x98) + chr(0x85)
#prints ★