Search code examples
dc.jscrossfilter

How to draw a Pareto chart in dc.js


I am trying to implement Pareto chart using composite chart from dc.js. Composite sort works fine if the data coming in are linear rather than ordinal. I am stuck at implementing the following.

I have the following code, in this process I am trying to create pareto where on x axis reasons would sit and there are two y axis. One would represent the sum of the time and other would be the overall percentage contribution of the time

total_time = sum(time)

contribution = time/total_time

One chart is suppose to be sorted in ASC order one with the time value and one is suppose to be sorted in DESC order one with the contribution

What is the approach here ?

var sample_data = [
{ reason: "A", time: 1 },
{ reason: "B", time: 6 },
{ reason: "C", time: 6 },
{ reason: "D", time: 5 },
{ reason: "A", time: 5 },
{ reason: "B", time: 5 },
{ reason: "C", time: 8 },
{ reason: "A", time: 8 },
{ reason: "B", time: 2 },
{ reason: "C", time: 2 },
{ reason: "D", time: 10 },
{ reason: "C", time: 7 },
{ reason: "A", time: 3 },
{ reason: "B", time: 4 },
{ reason: "C", time: 2 }];
    
    
var ndx_ = crossfilter(sample_data),
dim_  = ndx_.dimension( function(d) {return d.reason;} ),
grp1_ = dim_.group().reduceSum(function(d){ return d.time;});
grp2_ = dim_.group().reduce(
  function(p,v){
    p.reason = v.reason;
    p.time = v.time;
    p.total_time += +p.time;
    p.contribution = p.time/p.total_time; 
   return p;
  },
function(p,v){
    p.reason = v.reason;
    p.time = v.time;
    p.total_time -= +p.time;
    p.contribution = p.time/p.total_time; 
  
  return p;
},
function(p,v){
  return {reason:'',time:0,total_time:0,contribution:0}
});
   
    
var sortByTime = sample_data.sort(function (a, b) { return a.time < b.time; });
var sampleDataSorted = sortByTime.map(function (d) { return d; });

chart
 .width(768)
        .height(480)
        //.x(d3.scaleBand())
                .x(d3.scaleOrdinal().domain(sampleDataSorted.map(function(d) {
                    console.log("asas",d);
                    return d.reason;
        })))

        .xUnits(dc.units.ordinal)
        .yAxisLabel("The Y Axis")
        .legend(dc.legend().x(80).y(20).itemHeight(13).gap(5))
        .renderHorizontalGridLines(true)
        .compose([
            dc.barChart(chart)
                .dimension(dim_)
                            .barPadding(20)
                            .clipPadding(20)
                            .outerPadding(100)
                .group(grp1_, "Bars")
                            
                .centerBar(true) ,
                    dc.lineChart(chart) 
                .dimension(dim_)
                .colors('red')
                .group(grp2_, "Dots")
                .dashStyle([2,2])
                            .valueAccessor(function(d){return d.value.contribution})
            ])
.ordering(function(d) { return +d.time; })
        .brushOn(false)
        
 
    
        chart.render();

PS: I also have a setup here at this link here


Solution

  • So we need a group that computes the total time for each category ("reason"), sorts the computes the contribution of each item, and accumulates the contributions for the line chart.

    We can put this logic into a fake group that computes everything at once:

    function pareto_group(group, groupall) { // 1
        return {
          all: function() { // 2
            var total = groupall.value(), // 3
                cumulate = 0; // 4
            return group.all().slice(0) // 5
              .sort((a,b) => d3.descending(a.value, b.value)) // 6
              .map(({key,value}) => ({ // 7
                key,
                value: {
                  value,
                  contribution: value/total,
                  cumulative: (cumulate += value/total)
                }
              }))
          }
        };
    }
    var pg = pareto_group(grp1_, allTime_);   
    
    1. We need an ordinary group and a groupall for the total as inputs
    2. A "fake group" is an object that implements .all() and returns an array of {key, value} pairs
    3. We need the current total over all categories in order to compute the contribution of each category
    4. We will accumulate the contributions as we go from left to right
    5. We'll take the original group's .all(), copying the array using .slice(0)
    6. Sort in descending order by value
    7. ... and generate a new array, with the same keys, but with the value augmented with the individual and cumulative contribution

    Initializing the chart requires some obscure workarounds. I won't go too deep into this, except to say that yes this is more complicated than you would think.

    chart
     .width(768)
            .height(480)
            .x(d3.scaleBand())
            .elasticX(true)
            .ordering(kv => -kv.value.value)
            .xUnits(dc.units.ordinal)
            .group(pg)
            ._rangeBandPadding(1)
            .yAxisLabel("The Y Axis")
            .legend(dc.legend().x(80).y(20).itemHeight(13).gap(5))
            .renderHorizontalGridLines(true)
            .compose([
                dc.barChart(chart)
                    .dimension(dim_)
                    .barPadding(1)
                    .gap(1)
                    .centerBar(true)
                    .clipPadding(10)
                    .group(pg, "Contribution", kv => kv.value.value),
                dc.lineChart(chart) 
                    .dimension(dim_)
                    .colors('red')
                    .group(pg, "Cumulative", kv => Math.floor(kv.value.cumulative*100))
                    .useRightYAxis(true)
                    .dashStyle([2,2])
                ])
            .brushOn(false);
            
     chart.rightYAxis().tickFormat(d => d + '%')  
        
    

    Note that we are turning on elasticX in order to get the chart to reread the X scale domain each redraw.

    Most of the special cases involve ordinal charts.

    Here is a screenshot:

    pareto chart

    And here is a demo fiddle.