Search code examples
hadoopapache-pigepoch

How to get the MilliSeconds between two Epoch time values using PIG script


Game_ID | BeginTime | EndTime

1 | 1235000140| 1235002457
2 | 1235000377| 1235003300
3 | 1235000414| 1235056128
1 | 1235000414| 1235056128
2 | 1235000377| 1235003300

Here i would like to get the Milliseconds between two Epoch time fields, BeginTime and EndTime. Then Calculate the Average time for each games.


Solution

  • games = load 'games.txt' using PigStorage('|') as (gameid: int, begin_time: long, end_time:long);
    
    dump games; 
    (1,1235000140,1235002457)
    (2,1235000377,1235003300)
    (3,1235000414,1235056128)
    (1,1235000414,1235056128)
    (2,1235000377,1235003300)
    

    Step 1: Calculate the time difference

    difference = foreach games generate gameid, end_time - begin_time as time_lapse;
    
    dump difference;
    (1,2317)
    (2,2923)
    (3,55714)
    (1,55714)
    (2,2923)
    

    Step 2: Group the data on Game_ID

    game_group = group difference by gameid;
    
    dump game_group;
    (1,{(1,55714),(1,2317)})
    (2,{(2,2923),(2,2923)})
    (3,{(3,55714)})
    

    Step 3: Then the Average

    average = foreach game_group generate group, AVG(difference.time_lapse);
    
    dump average;
    (1,29015.5)
    (2,2923.0)
    (3,55714.0)