Search code examples
postgresqlhstore

Move column values to hstore in Postgres cannot finish on 15 million rows


I'm trying this query to move some metadata in a hstore attribute

UPDATE media_files
SET metadata = hstore (bb)
FROM
  (
    SELECT
    video_bitrate,
    video_codec,
    video_resolution,
    video_fps,
    video_aspect,
    video_container,
    audio_codec,
    audio_bitrate,
    audio_sample_rate
    FROM
    media_files
  ) AS bb
])

The table has 15 million records, I left the job running for 15 hours and it didn't finish and I'm not able to keep track since the table seems to be locked out for good during the operation.

Is there something I can do to optimize this?


Solution

  • Note: this assumes you have the space on your server for two copies of the table + indexes. Should work on PostgreSQL 9.2+

    CREATE TABLE media_files_temp AS
    WITH cte AS 
        (SELECT video_bitrate,
                video_codec,
                video_resolution,
                video_fps,
                video_aspect,
                video_container,
                audio_codec,
                audio_bitrate,
                audio_sample_rate,
                <your default value>::INTEGER as play_count
        FROM media_files mf)
    SELECT  cte.*,
            HSTORE('video_bitrate',video_bitrate) || 
            HSTORE('video_codec',video_codec) || 
            HSTORE('video_resolution',video_resolution) || 
            HSTORE('video_fps',video_fps) || 
            HSTORE('video_aspect',video_aspect) || 
            HSTORE('video_container',video_container) || 
            HSTORE('audio_codec',audio_codec) || 
            HSTORE('audio_bitrate',audio_bitrate) || 
            HSTORE('audio_sample_rate',audio_sample_rate)
            as metadata
    FROM cte;
    
    [create your indexes]
    
    BEGIN;
    ALTER TABLE media_files RENAME TO media_files_orig;
    ALTER TABLE media_files_temp RENAME TO media_files;
    COMMIT;
    
    -- a new column was requested by the OP
    ALTER TABLE media_files ALTER your_new_col SET DEFAULT <something>;
    
    ANALYZE media_files;