Vadorequest Vadorequest - 1 month ago 8
Javascript Question

Python urllib.unquote_plus node.js equivalent

I'm trying to decode some data given through HTTP GET, basically something coming encoded by Python and I'm trying to decode it using Node.

Python:

data = "0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3"
print urllib.unquote_plus(data)


Output:

0 -�͘9+�� (��Ɇr0�� ��v
�u�8�z�ێ-�TP��!�� �����[�=7#�S��`���]̕��ۣ�oVzC#�B���N9���x
|�XFB��





Node:

var token = '0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3';
console.log(unescape(_.replace(token, '+', '%20')))



_
is Lodash library. See https://lodash.com/docs/4.16.6#replace


Output:

0 -ûÍ9+Àå¢ (ÙÉr0+ñãv
ÙuÄ8ö­£z¾Û-ÄTPÕÁ!ûë ãúÕ[ó=7#çS·`Ûã]̸ÚÛ£oVzC#üBN9¾®x
|ùXFBó


The two decoded outputs looks similar, but yet are a bit different and I can't figure why. Once decrypted (through another algorithm), they give a similar result, but the JS version has unexpected characters like
{"duration": 600m�B�}PO�UQ��:...}'
and I can't figure what's wrong with the JS implementation.

Answer

I'm not sure if this has been done before (it probably has, maybe I did not search properly).

What you seem to need is a function that decodes a string with percent encoding into a buffer of bytes, instead of into a string, as regular URL-decoding functions would try to do.

Maybe this is a start:

function bufferUrlDecode(data) {
    var buf = new Buffer(data, 'ascii');
    var decoded = new Buffer(data.length).fill(0);
    var pos = 0, flag, temp = "";

    buf.forEach(b => {
        if (flag) {
            if (temp.length < 2) {
                temp += String.fromCharCode(b);
            }
            if (temp.length === 2) {
                decoded[pos++] = parseInt(temp, 16);
                flag = false;
            }
        } else {
            if (b === 43 /* '+' becomes space */) {
                decoded[pos++] = 32;
            } else if (b === 37 /* '%' triggers URL decoding */) {
                flag = true;
                temp = "";
            } else {
                decoded[pos++] = b;
            }
        }
    });

    return decoded.slice(0, pos);
}

usage:

var str = "0%0E%09-%FB%CD%989%2B%C0%E5%A2+%28%93%D9%C9%86r0%9C%8D+%F1%E3v%0C%9E%19%91%E3%06%01%FA%D5%5B%F3%3D7%10%23%E7%95S%B7%93%60%DB%1F%1F%8A%E3%5D%CC%95%B8%DA%DB%A3%18%9FoVzC%23%FCB%8D%86%86N9%BE%AE%98%13x%0D%D9u%C48%F6%AD%A3%19z%BE%DB%8E-%C4T%02P%06%D5%C1%21%1F%FB%9C%EB%0A%7C%F9XFB%9D%F3";    
var bytes = bufferUrlDecode(str);
console.log(bytes);
console.log(bytes.toString('hex'));

output:

<Buffer 30 0e 09 2d fb cd 98 39 2b c0 e5 a2 20 28 ... >
300e092dfbcd98392bc0e5a22028 etc...