Search code examples
javascriptstringsortinglexicographic

Invert string's lexicographic order value


Given that there is a lexicographically-sorted list of UTF-8 string s1, s2, s3, ... of unknown length, is it possible to invert each string value, such that when the list is sorted lexicographically again using the inverted value, the reverse order is now produced?

function invert(s) {
    // TODO: what's here?
    return s;
}

const sample = ['', ' ', 'a', 'A', '@', '한','자', '한자', '자한'];

const original = [...sample].sort((a, b) => {
    return a.localeCompare(b);
});

const inverted = [...sample].sort((a, b) => {
    return invert(a).localeCompare(invert(b));
});

// Both should print the same.
console.log('original', original);
console.log('inverted.reverse', inverted.reverse());

Solution

  • I mean to compare the string by individual charcodes

    Then you'll need to use a normal comparison instead of .localeCompare. But yes, it's possible then to invert the string by inverting every individual charcode. This won't result in readable strings and not in valid UTF-8 either, but can be used for the comparison.

    function invert(s) {
        return String.fromCharCode(...s.split('').map(c =>
            0xFFFF - c.charCodeAt(0)
        ), 0xFFFF);
    }
    
    const sample = ['', ' ', 'a', 'A', '@', '한','자', '한자', '자한'];
    console.log(sample.map(invert));
    
    const original = [...sample].sort((a, b) => {
        return +(a>b)-(a<b);
    });
    
    const inverted = [...sample].sort((a, b) => {
        return +(invert(a)>invert(b))-(invert(a)<invert(b));
    });
    
    // Both should print the same.
    console.log('original', original);
    console.log('inverted.reverse', inverted.reverse());

    To avoid shorter strings still being sorted first, we append \uFFFF. In theory this would have to be an infinite string - see An analogue for (-/+)Infinity for characters in JavaScript. For simplicity, I just assume that no strings end in \u0000 :-)

    To do it properly, you could double the size of each string so that there is enough coding space to construct an "end of string" mark that is larger than any "normal" character:

    function invert(s) {
        return s.split('').map(c =>
            ' ' + String.fromCharCode(0xFFFF - c.charCodeAt(0))
        ).join('') + '$';
    }
    
    const sample = ['', '\u0000', '\u0000\u0000', '.', '.\u0000', '.\uFFFF', 'a', 'A', '@', '한','자', '한자', '자한'];
    console.log(sample.map(invert));
    
    const original = [...sample].sort((a, b) => {
        return +(a>b)-(a<b);
    });
    
    const inverted = [...sample].sort((a, b) => {
        return +(invert(a)>invert(b))-(invert(a)<invert(b));
    });
    
    // Both should print the same.
    console.log('original', original);
    console.log('inverted.reverse', inverted.reverse());