Search code examples
rubyhashnested

Average of an array of nested hashes with same structure


Let's assume I have an array of nested hash like this one :

array = [
  {
    "id": 8444,
    "version": "2.1.0",
    "data": {
      "data1": {
        "data1-1": {
          "a": 132.6,
          "b": 128.36,
          "c": 153.59,
          "d": 136.48
        }
      },
      "data2": {
        "data2-1": {
          "a": 1283.0,
          "b": 1254.0,
          "c": 1288.5,
          "d": 1329.0
        }
      }
    }
  },
  {
    "id": 8443,
    "version": "2.1.0",
    "data": {
      "data1": {
        "data1-1": {
          "a": 32.6,
          "b": 28.36,
          "c": 53.59,
          "d": 36.48
        }
      },
      "data2": {
        "data2-1": {
          "a": 283.0,
          "b": 254.0,
          "c": 288.5,
          "d": 329.0
        }
      }
    }
  },
  {
    "id": 8442,
    "version": "2.1.0",
    "data": {
      "data1": {
        "data1-1": {
          "a": 32.6,
          "b": 28.36,
          "c": 53.59,
          "d": 36.48
        }
      },
      "data2": {
        "data2-1": {
          "a": 283.0,
          "b": 254.0,
          "c": 288.5,
          "d": 329.0
        }
      }
    }
  }
]

Each hash of the array has the same map structure.

I would like to create a new hash with the same hash map structure than data and for each values of a, b, c, d to have the average.

What is the best approach for this ? Because I cannot group_by key since I have the same key in different subkey (data1-1 and data2-1)

The result would then be :

{
  "data1": {
    "data1-1": {
      "a": 65.9,
      "b": 61.7,
      "c": 86.9,
      "d": 69.8
    }
  },
  "data2": {
    "data2-1": {
      "a": 616.3,
      "b": 587.3,
      "c": 621.8,
      "d": 662.3
    }
  }
}

I have tried this:

array.reduce({}) do |acc, hash|
  hash[:data].each do |k,v|
    acc[k] = v
  end
end
# => {:data1=>{:"data1-1"=>{:a=>32.6, :b=>28.36, :c=>53.59, :d=>36.48}}, 
#     :data2=>{:"data2-1"=>{:a=>283.0, :b=>254.0, :c=>288.5, :d=>329.0}}}

Solution

  • Let's group the hashes using .

    grouped = array.each_with_object({}) do |h, acc| 
      h[:data].each do |k, v| 
        acc[k] ||= []
        acc[k] << v 
      end
    end
    

    Result:

    {:data1 => [{:"data1-1" => {:a=>132.6, :b=>128.36, :c=>153.59, :d=>136.48}}, 
                {:"data1-1" => {:a=>32.6, :b=>28.36, :c=>53.59, :d=>36.48}}, 
                {:"data1-1" => {:a=>32.6, :b=>28.36, :c=>53.59, :d=>36.48}}], 
     :data2 => [{:"data2-1" => {:a=>1283.0, :b=>1254.0, :c=>1288.5, :d=>1329.0}}, 
                {:"data2-1" => {:a=>283.0, :b=>254.0, :c=>288.5, :d=>329.0}}, 
                {:"data2-1" => {:a=>283.0, :b=>254.0, :c=>288.5, :d=>329.0}}]}
    

    Now, let's transform those values.

    grouped = grouped.transform_values do |arr| 
      k = arr.first.keys.first
      arr.collect { |a| {k => a[k]} }.each_with_object({}) do |h, acc|
        h.each do |k, v| 
          acc[k] ||= []
          acc[k] << v 
        end
      end 
    end
    # => {:data1=>{:"data1-1"=>[{:a=>132.6, :b=>128.36, :c=>153.59, :d=>136.48}, 
    #                           {:a=>32.6, :b=>28.36, :c=>53.59, :d=>36.48}, 
    #                           {:a=>32.6, :b=>28.36, :c=>53.59, :d=>36.48}]}, 
    #     :data2=>{:"data2-1"=>[{:a=>1283.0, :b=>1254.0, :c=>1288.5, :d=>1329.0}, 
    #                           {:a=>283.0, :b=>254.0, :c=>288.5, :d=>329.0}, 
    #                           {:a=>283.0, :b=>254.0, :c=>288.5, :d=>329.0}]}}
    

    This is much closer to your stated goal.

    Let's transform some values again.

    grouped = grouped.transform_values do |v|
      k = v.keys.first
      values = v.values.first.each_with_object({}) do |h, acc|
        h.each do |hk, hv|
          acc[hk] ||= []
          acc[hk] << hv
        end
      end
    
      { k => values }
    end
    # => {:data1=>{:"data1-1"=>{:a=>[132.6, 32.6, 32.6], 
    #                           :b=>[128.36, 28.36, 28.36], 
    #                           :c=>[153.59, 53.59, 53.59], 
    #                           :d=>[136.48, 36.48, 36.48]}}, 
    #     :data2=>{:"data2-1"=>{:a=>[1283.0, 283.0, 283.0], 
    #                           :b=>[1254.0, 254.0, 254.0], 
    #                           :c=>[1288.5, 288.5, 288.5], 
    #                           :d=>[1329.0, 329.0, 329.0]}}}
    

    Even closer. Averaging an array of numbers is easy. We just need to transform values.

    Replacing the previous bit of code with:

    grouped = grouped.transform_values do |v|
      k = v.keys.first
      values = v.values.first.each_with_object({}) do |h, acc|
        h.each do |hk, hv|
          acc[hk] ||= []
          acc[hk] << hv
        end
      end
    
      { k => values.transform_values { |v| v.sum / v.size } }
    end
    # => {:data1=>{:"data1-1"=>{:a=>65.93333333333334, 
    #                           :b=>61.693333333333335, 
    #                           :c=>86.92333333333333, 
    #                           :d=>69.81333333333333}}, 
    #     :data2=>{:"data2-1"=>{:a=>616.3333333333334, 
    #                           :b=>587.3333333333334, 
    #                           :c=>621.8333333333334, 
    #                           :d=>662.3333333333334}}}