Search code examples
c++postgresqllibpqlibpqxxseal

Updating PostgreSQL table with binary data


I have a std::stringstream strs variable with this serialized binary data:

�G�{SSF��>%�����hgRQ;Tjh A "ʐk�R3 1[Z�yA _�Kx O��� ���f��' ����t %��+>, ���~� 삾�+/ Tb�Ҷ�7 �(���� �Q1�5m& ��( G#�bm 3O�AN ) �DP߇g �0=ʆ�0 ���j�u E�3�� �G�#�" \��!�o% L.�� �WMG?B- 3����}& �.�S� (�B� �j&� �@��%&, 65��0 !G�5R ��N��0 ��b�� h񅷇v) �8�� x�%7 5e��: w|7ώJ 8���� ����X/ v�c�h2 3��i� o^���
�A��� �oG��0 +���Ȑ" n�� ���4 F#�>b .��m�=; � �X ��< � c(= ���7Y: �� �� Q��O� w�k�q! �D��G�8 O���l�1 j��DH ��rhJ v͑UF� �P���; �| ���h �U��z�* 0 Ԏ��6 @I� ��,K�� �R�B� ��ﲒi ��o�H�0 �"�� ���B,� $6QP�@ YŽ�05 �8�s�� �>���:> ���*� Kp1>�~< ����� x�5 05 S?�V�" �m�7 )����z$ �Ye��- �nKPz ~8�쩳 dF �̄5 �ɼ��% v�x�O�# 9�B�/�6 �5��[. ��P%:� ���V�tG'�O ��#bQ�9 �����) �0%�[0 f�(? e( �5 rt �O
�[�۠ �ɴKG� �'$�_s ��g:] Aߞ,�Q

This is the same data after using GDB:

"\374G\371{SSF\301\251*>\177%\225\201\253i\b\344\206\341hgRQ\016\022\001;Tjh\002\000\000\000A\000\000\000\001\000\000\000\"ʐk\323R3\000\061[Z\272yA\001\000_\340\016K\004x\005\000O\226\f\262\a\375\020\000\346\302\341f\230\235'\000\026\275\367\371\215t\020\000%\356\033\372+>,\000\307\016\361\327~\223\033\000삾\351\254+/\000Tb\335Ҷ\363\067\000\342(\357\336\346\326\017\000\327Q1\320\065m&\000\034\216\377\035\005(\a\000\020G#\345bm\017\000\063O\324AN )\000\225DP߇g\a\000\355\060=ʆ\260\060\000\232\377\272j\234u\006\000E\304\063\372\355\v\f\000\223G\353\024#\307\"\000\\317\327!\270o%\000\005L\035.\345\306\002\000\257WMG?B-\000\063\275\342\373\304}&\000\365\017.\367S\264\031\000(\236\033B\354\255\n\000\316\034j&\321\021\n\000\266@\226\314%&,\000\066\065\236\024\271\033\060\000!G\332\v5R\030\000\372\f\353N\225\201\060\000\331\377\035b\244\263\033\000h\361\205\267\207v)\000\322\027\070\246\212w\b\000x<\001\026n}\034\000Җp{\362F\t\000\370\352m\026ŵ8\000(\366\366ۡ:\017\000\364\026\210b=\235\"\000\251\214[)\262\342\022\000 m\316fd\345\060\000[VAl\206\233\006\000\035\354o\021'9#\000\363\032\327\372\357\322\034\000|'\256\306K{\021\000B\266\330v\257\332+\000x\346\023\300\315\306!\000qO\016\005wz*\000T\236*\021\272H;\000\ba\006$\376\214\027\000\213\377\a\330\372\023\003\000\361,m\274\300\321\004\000DË\277\272T \000\357\231Ţ\355\270\067\000;:\263\303\070\246\022\000~\267\374\060\272\261\r\000\264>\374\021\027%7\000\065\034e\216\305:\006\000w|7ώJ\006\000\070\242\210\310\343\206\n\000\365\263\370\377\005X/\000v\335c\200h\035\062\000\063\225\363\244i\362\r\000o^\371\353\302\n\027\000\000\000\000\000\000\000\000\000\t\224A\207\331\374\024\000\254oG\352\364\177\060\000+\254\332\330Ȑ\"\000n\024\310\360\265?\b\000\004\234\231\006\250\064\016\000F#\225>b\020 \000.\271\224m\246=;\000\305\t\017\372X\024\024\000\232\023\005\360\277<\017\000\036\242 c\005(=\000\211\200\212\067Y:\000\273\206\017\n\204\233\027\000\002Q\364\376O\277\033\000w\212\033k\273q!\000\333D\204\241G\331\070\000O\242\306\346l\327\061\000j\224\205DH\033\032\000\270\025\375rhJ\n\000v͑UF\227 \000\230P\272\211\247\005;\000\372\177\017\310\b|\032\000\367\357\255\352h\n\036\000\226U\230\255z\247*\000\060\000Ԏ\241\301\066\000\315\b@\024I\227\032\000\275\322,K\275\304\034\000\332R\375B\023\376\001\000\331\324ﲒi\r\000\256\353o\237H\205\060\000\353\"\026\025\360\320\n\000\203\306\344B,\200\006\000$6QP\325@\017\000YŽ\036\203\060\065\000\366\070\251s\264\252\017\000\360>\251\310\340:>\000\257\310\326\005*\216\036\000Kp1>\232~<\000\225\002\253\302\365\350\021\000x\341\065\000\060\065\021\000S?\331V\311\"\000\000\356m\307\003\030\067\004\000)\212\265\351\331z$\000\336Ye\217\323\033-\000\215nK\026Pz\v\000~8\273쩳\r\000dF\r\261̄5\000\206\aɼ\303\365%\000v\365x\304O\346#\000\071\210B\373/\264\066\000\324\065\216\003\274[.\000\363\343P%:\311\033\000\244\301\370V\367t\006\000G\005'\213O\017\000\252\220#bQ\324\071\000\376\272\377\347\016\237)\000\374\060%\311[0\000\177\rf\357\233(?\000e(\r\2045\r\000rt\000\005\230O*\000\345[\204۠x\b\000\207ɴKG\224\v\000\273'$\261_s\036\000\215\240\fg:]\002\000Aߞ,\303Q\t\000\000\000\000\000\000\000\000"

I'm trying to upload this data (stored in std::stringstream strs) to a PostgreSQL database encoded with UTF8 and into the byte_info column excepting bytea using the libpqxx library:

 pqxx::connection Con("My con information");
 pqxx::work W(Con);
 W.exec("UPDATE " + tableName + " SET byte_info =" + strs.str() + " WHERE id = 1;");
 W.commit();

But all I get is this error:

ERROR: invalid byte sequence for encoding "UTF8": 0xfc

What am I missing here or doing wrong?


Solution

  • According to manual of PostgreSQL bytea, there are 2 ways for writing a statement containing binary stream.

    For a string "\x4A\xC3\xA1\xF2\x18"

    1. hex bytea : E'\\x4AC3A1F218'
    2. escaped bytea : E'J\\303\\241\\362\\030'::bytea -- escape \ as \\\\, escape ' as \' and escape non-printable as \\three-digit-octal

    So you can come up with functions like these.

    std::string ascii_to_hex_bytea(std::string_view sv) {
      std::ostringstream os;
      os << R"(E'\\x)" << std::hex << std::uppercase;
      for (unsigned char ch : sv) {
        os << std::setfill('0') << std::setw(2) << static_cast<uint16_t>(ch);
      }
      os << "'";
      return os.str();
    
    }
    
    std::string ascii_to_escaped_bytea(std::string_view sv) {
      std::ostringstream os;
      os << "E'" << std::oct;
      for (unsigned char ch : sv) {
        if (isprint(ch))
          switch (ch) {
            case('\\') : os << R"(\\\\)"; break; // escape back slash
            case('\'') : os << R"(\')"; break;   // escape single quote
            default    : os << ch;               // simply put printable char
          }
        else // escape the rest as an octal with back slash leading
          os << R"(\\)" << std::setfill('0') << std::setw(3) << static_cast<uint16_t>(ch);
      }
      os << "'::bytea";
      return os.str();
    
    }
    

    Suppose you have ss as stringstream with some data (for demo, we just ramdom it here)

      std::stringstream ss;
      { // random bits for length of 1024
        std::string str(1024,'\0');
        for (char* addr = str.data(); addr < str.data() + str.size(); ++addr )
          *addr = static_cast<char>(std::experimental::randint<uint16_t>(0,255) );
        ss.str(str);
      }
    

    You can write statement using those functions

      auto hex_str = ascii_to_hex_bytea(ss.str() );
      std::cout << hex_str << "\n";
    
      std::string tableName{"table_name"};
      std::string statement1 = "UPDATE " + tableName + " SET byte_info = " + hex_str + " WHERE id = 1;";
      std::cout << statement1 << "\n\n";
    
      auto escaped_str = ascii_to_escaped_bytea(ss.str() );
      std::cout << escaped_str << "\n";
    
      std::string statement2 = "UPDATE " + tableName + " SET byte_info = " + escaped_str + " WHERE id = 1;";
      std::cout << statement2 << "\n";
    

    Print

    E'\\x4AC3A1F218E1ED92AB0B3966C3E99CC5BD8419B4A91D504F85AE7621525F305A...'
    UPDATE table_name SET byte_info = E'\\x4AC3A1F218E1ED92AB0B3966C3E99C...' WHERE id = 1;
    
    E'J\\303\\241\\362\\030\\341\\355\\222\\253\\0139f\\303\\351\\234\\30...'::bytea
    UPDATE table_name SET byte_info = E'J\\303\\241\\362\\030\\341\\355\\...'::bytea WHERE id = 1;
    

    godbolt.org/g/8Ctgcu

    wandbox.org/permlink/eaaAWz7pCbGTLcbC