I want to show a line graph with rolling std over the sum of values for an interval of dates.
The code for the generation of the crossfilter/reductio object is :
myCrossfilter = crossfilter(data);
function getRunningDates(numDays) {
return function getDates(d) {
var s = d.ValueDate;
var e = new Date(s);
e.setDate(e.getDate() + numDays);
a = [];
while (s < e) {
a.push(s);
s = new Date(s.setDate(
s.getDate() + 1
))
}
return a;
}
}
var dim1 = myCrossfilter.dimension(getRunningDates(20), true);
var dim2 = myCrossfilter.dimension(dc.pluck("ValueDate"));
var group1 = dim1.group();
var group2 = dim2.group();
var reducerRolling = reductio()
.std("value");
reducerRolling(group1);
var reducer = reductio()
.sum("value")
reducer(group2);
I have put everything into a jsFiddle to show what I mean (unrelated question : I do not understand how the dates on the graphs can go beyond my dateToInit
variable defined in the fiddle).
I would like the bottom graph to be a rolling std
of the values in the top graphs. What ends up happening is that the std
calculation in bottom graph does not do the sum
aggregation first (which makes sense I understand that).
Is there a way to use a group as the dimension for another group ? If not, how would one achieve what I am trying to do ?
OK so I've come up with a solution based on the 'fake group' approach suggested by Gordon.
I have updated the jsFiddle with a working version.
The gist of it is define custom reducing functions :
reduceAddRunning = function(p,v) {
if (!p.datesData.hasOwnProperty(v.ValueDate)) {
p.datesData[v.ValueDate]=0;
}
p.datesData[v.ValueDate]+=+v.value;
p.value+=+v.value;
return(p);
};
reduceRemoveRunning = function(p,v) {
p.datesData[v.ValueDate]-=+v.value;
p.value-=+v.value;
return(p);
};
reduceInitRunning = function(p,v) {
return({
value:0,
datesData:{},
});
};
and then build a fake group as such :
var running_group = function (source_group,theRunningFn) {
return {
all:function () {
return source_group.all().map(function(d) {
var arr = [];
for (var date in d.value.datesData) {
if (d.value.datesData.hasOwnProperty(date)) {
arr.push(d.value.datesData[date]);
}
}
return {key:d.key, value:theRunningFn(arr)};
});
}
};
}
with theRunningFn
being math.std
in my case.
I am still left with 2 issues which will be the basis for a new question I guess :
EDIT : the following is a better solution based on Gordon comment (again!).
Just do a regular sum group and apply the following fake group function :
var running_group_2 = function (source_group,numDays,theRunningFn) {
return {
all:function () {
var source_arr = source_group.all();
var keys = source_arr.map(function(d) {return d.key;});
var values = source_arr.map(function(d) {return d.value;});
var output_arr = [];
for (var i = numDays;i<source_arr.length;i++) {
if (i<numDays) {
output_arr.push({key:keys[i],value:0});
} else {
output_arr.push({
key:keys[i],
value:theRunningFn(values.slice(i-numDays,i))
});
}
}
return output_arr;
}
};
}
It solves both the speed issue (as it's much less cumbersome and doesn't store all the daily values to be used, instead using the already aggregated values) and the edge cases (even if it's not easily generalizable beyond my case as far as the edge cases are concerned : I juts don't show a value when I don't have enough points to calculate the running variable).
Here is the jsFiddle for that second (better for my purposes) solution.