Derek 朕會功夫 Derek 朕會功夫 - 27 days ago 18
Android Question

Correcting incorrectly encoded string (ASCII characters back to UTF-8)

Here is a sample WiFi ssid I have extracted from an Android "wifi config file" (

wpa_supplicant.conf
).

I'm trying to display all the ssid's in the file, most are okay as they are normal strings wrapped in quotes, for example,

network={
ssid="Linksys"
...
}


However, some entries just wanted to be different and special, for example,

network={
ssid=e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab
...
}


Now, the question is, how do I convert it back to a readable string (preferably in JS)? I suspect the encoding was wrong (it displays correctly on a native device though.)

Answer

Apparently the string is in hex unencoded. By turning it back to binary following by some string manipulation, I am able to encode it back to the readable form.

function HextoUTF8(txt) {
    function HexStringToBytes(str) {
        if (str.length % 2) throw TypeError("Not a valid length");

        return [].map.call(str, function(e) {
            return ("000" + parseInt(e, 16).toString(2)).slice(-4);
        }).join("").match(/.{8}/g);
    }

    function BytesToUTF8(bytes) {
        var inExpectationMode = false,
            itr = new Iterator(bytes),
            byte,
            availableBitsTable = {
                "1": -7,
                "2": -5,
                "3": -4,
                "4": -3
            },
            expectingBitsLeft = 0,
            currectCharacter = "",
            result = "";

        while (byte = itr.next(), !byte.ended) {
            byte = byte.value;

            if (inExpectationMode) {
                currectCharacter += byte.slice(-6);
            } else {
                //First in sequence
                expectingBitsLeft = determineSequenceLength(byte);
                currectCharacter += byte.slice(availableBitsTable[expectingBitsLeft]);
            }

            inExpectationMode = true;
            expectingBitsLeft--;

            if (!expectingBitsLeft) {
                inExpectationMode = false;
                result += String.fromCharCode(parseInt(currectCharacter, 2));
                currectCharacter = "";
            }
        }

        return result;
    }

    function determineSequenceLength(byte) {
        if (byte[0] === "0") return 1;
        else if (byte.slice(0, 3) === "110") return 2;
        else if (byte.slice(0, 4) === "1110") return 3;
        else if (byte.slice(0, 5) === "11110") return 4;
    }

    function Iterator(array) {
        if (this === window) throw TypeError("This is a class");
        if (!Array.isArray(array)) throw TypeError("An array is required");

        this.i = -1;
        this.ended = !array.length;
        this.array = function() {
            return array;
        };
    }

    Iterator.prototype.next = function() {
        if (this.ended || ++this.i == this.array().length) {
            this.ended = true;
            return {
                ended: true
            };
        } else {
            return {
                ended: this.ended,
                value: this.array()[this.i]
            };
        }
    }

    return BytesToUTF8(HexStringToBytes(txt));
}

Optimally I should be doing bit manipulation instead, but whatever, it works,

> HextoUTF8("e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab");
> "♪ 淡定有錢剩 ♫"