Search code examples
c#textnumberstransfer

Size efficient way to transfer array of numbers as text


Lets say I have a following array of numbers:
1,5,500,994,6950,54,54,845,101,54046506452,5980,960406,55,680,68045,66540,321032

What is the most size efficient way to transfer it with http post to a website which decodes it back to numbers?

If I transfer it as text "1,5,500,994,6950,54,54,845,101,54046506452,5980,960406,55,680,68045,66540,321032" then every 1 digit and separator take 1 byte which is a waste of data since I may use all following characters: a-z, A-Z, 0-9, -, ., _, ~ allowed in http without percent encoding.

I can recalculate decimal numeral system to a 65 numeral system which uses abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._ and use ~ as a separator which would make it look like b~f~hT~pt~bP8~2~2~na~bK~ULUhIQ~bBa~dGuF~3~kE~qg3~pWT~bk.-. But in my program I use mostly numbers of length 3-5 characters. Therefore around 1/5 of data would be only separators. Isn't there a better solution?

This question is asked very generally on purpose since I am open to any creative solution.


Solution

  • This is a complete code for compression and decompression of positive long integers based on h3n's idea of using 1 byte for 2 characters, thanks! It uses 15 numeral system: from 0000 to 1110 binary and 1111 (0xF) as a separator.

    compression:

    public List<byte> EncodeNumbers(List<long> input)
    {
        List<byte> bytes = new List<byte>();
        int bytes_i = 0;
        for (int a = 0; a < input.Count; a++)
        {
            int buffer_i = 65;
            byte[] buffer = new byte[buffer_i];
            while (input[a] > 0)
            {
                buffer[--buffer_i] = (byte)(input[a] %15);
                input[a] /= 15;
            }
            for (int b = 0; b < 65 -buffer_i; b++)
            {
                if (bytes_i %2 == 0)
                {
                    bytes.Add((byte)(buffer[b +buffer_i] << 4));
                    bytes_i++;
                }else{
                    bytes[bytes_i++ /2] += buffer[b +buffer_i];
                }
            }
            if (a +1 != input.Count)
            {
                if (bytes_i %2 == 0)
                {
                    bytes.Add(0xF << 4);
                    bytes_i++;
                }else{
                    bytes[bytes_i++ /2] += 0xF;
                }
            }
            else if (bytes_i %2 != 0)
            {
                bytes[bytes_i++ /2] += 0xF;
            }
        }
        return bytes;
    }
    

    decompression:

    public List<long> DecodeNumbers(List<byte> input)
    {
        List<long> numbers = new List<long>();
        int buffer_i = 0;
        byte[] buffer = new byte[17]; // max long = 9223372036854775807 = 160E2AD3246366807 (17 chars)
        for (int a = 0; a < input.Count; a++)
        {
            for (int i = 0; i < 2; i++)
            {
                byte value = (byte)((i == 0) ? input[a] >> 4 : input[a] & 0xF);
                if (value != 0xF)
                {
                    buffer[buffer_i++] = value;
                }else{
                    long number = 0;
                    for (int b = 0; b < buffer_i; b++)
                    {
                        number += buffer[buffer_i -1 -b] *(long)Math.Pow(15, b);
                    }
                    buffer_i = 0;
                    numbers.Add(number);
                }
            }
        }
        if (buffer_i != 0)
        {
            long number = 0;
            for (int b = 0; b < buffer_i; b++)
            {
                number += buffer[buffer_i -1 -b] *(long)Math.Pow(15, b);
            }
            numbers.Add(number);
        }
        return numbers;
    }
    

    usage:

    List<long> numbers = new List<long>{4,10,14,51,5990,922337203685477,64,4685746,56545674,94,1,65454677,665555,1234567890,55555,22,2,3,2,0,99999,99955500099955577,1,2,666,654154,654,58,56,69,7,55,5647,321,25,0,697,9,9,9,9,9,96,5,546,4,645545,64564564,5465498654,6476854,85849865,6478596743,6,6,1,2,3,3,3,548745,6647};
    
    string s = "plain text:\r\n";
    string str = "";
    foreach (long val in numbers)
    {
        str += val + "|";
    }
    s += str + "\r\n" + str.Length + " bytes\r\n\r\n";
    
    List<byte> encoded = EncodeNumbers(numbers);
    s += "compressed base64:\r\n";
    str = Convert.ToBase64String(encoded.ToArray());
    s += str + "\r\n" + str.Length + " bytes\r\n\r\n";
    
    List<long> decompressed = DecodeNumbers(encoded);
    str = "";
    foreach (long val in decompressed)
    {
        str += val + "|";
    }
    s += "decompressed:\r\n" + str + "\r\n" + str.Length + " bytes";
    
    Clipboard.SetText(s);
    

    output:

    plain text:
    4|10|14|51|5990|922337203685477|64|4685746|56545674|94|1|65454677|665555|1234567890|55555|22|2|3|2|0|99999|99955500099955577|1|2|666|654154|654|58|56|69|7|55|5647|321|25|0|697|9|9|9|9|9|96|5|546|4|645545|64564564|5465498654|6476854|85849865|6478596743|6|6|1|2|3|3|3|548745|6647|
    278 bytes
    
    compressed base64:
    T6/vNvG5X3GXGSLIRS9E9ihYH05uQZ9k8fWy3qL9IwX3NbfWDxFtrxfy8/L/HpafNlXR4iHg2i8fLy5vzcVPLZ89879J9/OvGhfxZvGv8xf5+fn5+fZvXyZvT8tBX1oFOU8h7FcB74fhBPeAvuXyfbdDSPb28fLz8/P6yNDx6C8=
    172 bytes
    
    decompressed:
    4|10|14|51|5990|922337203685477|64|4685746|56545674|94|1|65454677|665555|1234567890|55555|22|2|3|2|0|99999|99955500099955574|1|2|666|654154|654|58|56|69|7|55|5647|321|25|0|697|9|9|9|9|9|96|5|546|4|645545|64564564|5465498654|6476854|85849865|6478596743|6|6|1|2|3|3|3|548745|6647|
    278 bytes
    

    Theere is a little loss of data when numbers are close to maximum of long int due to data type conversion which can be seen on 99955500099955577 vs 99955500099955574.