Search code examples
javascriptandroidencodingwifissid

Correcting incorrectly encoded string (ASCII characters back to UTF-8)


Here is a sample WiFi ssid I have extracted from an Android "wifi config file" (wpa_supplicant.conf).

I'm trying to display all the ssid's in the file, most are okay as they are normal strings wrapped in quotes, for example,

network={
    ssid="Linksys"
    ...
}

However, some entries just wanted to be different and special, for example,

network={
    ssid=e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab
    ...
}

Now, the question is, how do I convert it back to a readable string (preferably in JS)? I suspect the encoding was wrong (it displays correctly on a native device though.)


Solution

  • Apparently the string is in hex unencoded. By turning it back to binary following by some string manipulation, I am able to encode it back to the readable form.

    function HextoUTF8(txt) {
        function HexStringToBytes(str) {
            if (str.length % 2) throw TypeError("Not a valid length");
    
            return [].map.call(str, function(e) {
                return ("000" + parseInt(e, 16).toString(2)).slice(-4);
            }).join("").match(/.{8}/g);
        }
    
        function BytesToUTF8(bytes) {
            var inExpectationMode = false,
                itr = new Iterator(bytes),
                byte,
                availableBitsTable = {
                    "1": -7,
                    "2": -5,
                    "3": -4,
                    "4": -3
                },
                expectingBitsLeft = 0,
                currectCharacter = "",
                result = "";
    
            while (byte = itr.next(), !byte.ended) {
                byte = byte.value;
    
                if (inExpectationMode) {
                    currectCharacter += byte.slice(-6);
                } else {
                    //First in sequence
                    expectingBitsLeft = determineSequenceLength(byte);
                    currectCharacter += byte.slice(availableBitsTable[expectingBitsLeft]);
                }
    
                inExpectationMode = true;
                expectingBitsLeft--;
    
                if (!expectingBitsLeft) {
                    inExpectationMode = false;
                    result += String.fromCharCode(parseInt(currectCharacter, 2));
                    currectCharacter = "";
                }
            }
    
            return result;
        }
    
        function determineSequenceLength(byte) {
            if (byte[0] === "0") return 1;
            else if (byte.slice(0, 3) === "110") return 2;
            else if (byte.slice(0, 4) === "1110") return 3;
            else if (byte.slice(0, 5) === "11110") return 4;
        }
    
        function Iterator(array) {
            if (this === window) throw TypeError("This is a class");
            if (!Array.isArray(array)) throw TypeError("An array is required");
    
            this.i = -1;
            this.ended = !array.length;
            this.array = function() {
                return array;
            };
        }
    
        Iterator.prototype.next = function() {
            if (this.ended || ++this.i == this.array().length) {
                this.ended = true;
                return {
                    ended: true
                };
            } else {
                return {
                    ended: this.ended,
                    value: this.array()[this.i]
                };
            }
        }
    
        return BytesToUTF8(HexStringToBytes(txt));
    }
    

    Optimally I should be doing bit manipulation instead, but whatever, it works,

    > HextoUTF8("e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab");
    > "♪ 淡定有錢剩 ♫"