dc.js 使用交叉过滤器将回归图合并到现有散点图中
dc.js Incorporate regression chart into existing scatterplot with crossfilter
我正在使用 dc.js 和 crossfilter.js 创建一个 d3 仪表板,我想知道如何将回归线实现到响应过滤的散点图中。
我一直在玩几个重新添加回归线的例子,但我一直没有成功提取和合并代码。
我对数学没有问题,但对如何从维度访问过滤后的数据,然后如何将回归线添加到过滤后的散点图(这样回归线也响应未来的过滤)。
var data = [
{"record":"record","date":"date","cars":"cars","bikes":"bikes"},
{"record":"1","date":"01/05/2012","cars":"1488.1","bikes":"49.73"},
{"record":"2","date":"02/05/2012","cars":"1374.29","bikes":"52.44"},
{"record":"3","date":"03/05/2012","cars":"1353.01","bikes":"47.92"},
{"record":"4","date":"04/05/2012","cars":"1420.33","bikes":"50.69"},
{"record":"5","date":"05/05/2012","cars":"1544.11","bikes":"47.47"},
{"record":"6","date":"06/05/2012","cars":"1292.84","bikes":"47.75"},
{"record":"7","date":"07/05/2012","cars":"1318.9","bikes":"48.64"},
{"record":"8","date":"08/05/2012","cars":"1686.3","bikes":"50.9"},
{"record":"9","date":"09/05/2012","cars":"1603.99","bikes":"53.44"},
{"record":"10","date":"10/05/2012","cars":"1420.1","bikes":"53.29"},
{"record":"11","date":"11/05/2012","cars":"1410.8","bikes":"54.06"},
{"record":"12","date":"12/05/2012","cars":"1374.62","bikes":"51.24"},
{"record":"13","date":"13/05/2012","cars":"1279.53","bikes":"53.96"},
{"record":"14","date":"14/05/2012","cars":"1330.47","bikes":"49.5"},
{"record":"15","date":"15/05/2012","cars":"1377.61","bikes":"52.32"},
{"record":"16","date":"16/05/2012","cars":"1302.12","bikes":"51.96"},
{"record":"17","date":"17/05/2012","cars":"1326.9","bikes":"49.86"},
{"record":"18","date":"18/05/2012","cars":"1181.55","bikes":"50.25"},
{"record":"19","date":"19/05/2012","cars":"1493.75","bikes":"51.24"},
{"record":"20","date":"20/05/2012","cars":"1463.9","bikes":"50.88"},
{"record":"21","date":"21/05/2012","cars":"1370.16","bikes":"51.09"},
{"record":"22","date":"22/05/2012","cars":"1403.3","bikes":"51.67"},
{"record":"23","date":"23/05/2012","cars":"1277.65","bikes":"49.3"},
{"record":"24","date":"24/05/2012","cars":"1361.94","bikes":"50.47"},
{"record":"25","date":"25/05/2012","cars":"1400.8","bikes":"51.55"},
{"record":"26","date":"26/05/2012","cars":"1289.09","bikes":"47.17"},
{"record":"27","date":"27/05/2012","cars":"1258.39","bikes":"52.12"},
{"record":"28","date":"28/05/2012","cars":"1288.71","bikes":"49.28"},
{"record":"29","date":"29/05/2012","cars":"1511.86","bikes":"50.73"},
{"record":"30","date":"30/05/2012","cars":"1300.38","bikes":"52.39"},
{"record":"31","date":"31/05/2012","cars":"1455.19","bikes":"49.53"},
{"record":"32","date":"01/06/2012","cars":"1311.89","bikes":"50.37"},
{"record":"33","date":"02/06/2012","cars":"1368.64","bikes":"50.87"},
{"record":"34","date":"03/06/2012","cars":"1360.05","bikes":"50.51"},
{"record":"35","date":"04/06/2012","cars":"1382.56","bikes":"49.67"},
{"record":"36","date":"05/06/2012","cars":"1304.15","bikes":"47.6"},
{"record":"37","date":"06/06/2012","cars":"1271.57","bikes":"50.22"},
{"record":"38","date":"07/06/2012","cars":"1442.38","bikes":"50.8"},
{"record":"39","date":"08/06/2012","cars":"1406.38","bikes":"53.14"},
{"record":"40","date":"09/06/2012","cars":"1724.16","bikes":"49.66"},
{"record":"41","date":"10/06/2012","cars":"1931.05","bikes":"53"},
{"record":"42","date":"11/06/2012","cars":"1669.47","bikes":"53.71"},
{"record":"43","date":"12/06/2012","cars":"1794.06","bikes":"51.78"},
{"record":"44","date":"13/06/2012","cars":"1625.98","bikes":"51.58"},
{"record":"45","date":"14/06/2012","cars":"1371.51","bikes":"52.36"},
{"record":"46","date":"15/06/2012","cars":"1418.05","bikes":"47.64"},
{"record":"47","date":"16/06/2012","cars":"1431","bikes":"53.14"},
{"record":"48","date":"17/06/2012","cars":"1527.21","bikes":"48.63"},
{"record":"49","date":"18/06/2012","cars":"1320.95","bikes":"51.7"},
{"record":"50","date":"19/06/2012","cars":"1396.93","bikes":"52.92"}
];
tSel1 = "cars";
tSel2 = "bikes";
data.forEach(function (d) {
d[tSel1] = +d[tSel1];
d[tSel2] = +d[tSel2];
});
var facts = crossfilter(data);
var allDimension = facts.groupAll();
var scatterDimension = facts.dimension(function(d) {return [+d[tSel1], +d[tSel2]];});
var scatterGroup = scatterDimension.group().reduceSum(function(d) { return d[tSel1]; });
var maxY1 = d3.max(data, function(d) {return d[tSel1]});
var maxY2 = d3.max(data, function(d) {return d[tSel2]});
var maxY1Plus = maxY1 + (maxY1 * 0.1);
var maxY2Plus = maxY2 + (maxY2 * 0.1);
var minY1 = d3.min(data, function(d) {return d[tSel1]});
var minY1Minus = minY1 * 0.9;
var minY2 = d3.min(data, function(d) {return d[tSel2]});
var minY2Minus = minY2 * 0.9;
xyScatterChart = dc.scatterPlot("#scatterPlot");
xyScatterChart
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true)
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
dc.renderAll();
dc.redrawAll();
<link href="http://dc-js.github.io/dc.js/css/dc.css" rel="stylesheet"/>
<script src="http://dc-js.github.io/dc.js/js/d3.js"></script>
<script src="http://dc-js.github.io/dc.js/js/crossfilter.js"></script>
<script src="http://dc-js.github.io/dc.js/js/dc.js"></script>
<div id="scatterPlot"></div>
参考文献:
https://groups.google.com/forum/#!topic/dc-js-user-group/HaQMegKa_U0
包含一个 example in dc.js 会很棒,因为这是很多人都可以使用的东西。
也许我们可以一起努力?我不懂数学,但这里有一种简单的方法,可以使用复合图表显示根据聚合组计算的数据线。
首先,这是嵌入了旧散点图的合成图表:
var composite = dc.compositeChart("#composite");
composite
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.compose([
dc.scatterPlot(composite)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true),
dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
请注意,我们为合成图和散点图都提供了散点组。那只是因为复合图表需要一个组,即使它实际上并不使用它。
我们已将与坐标相关的参数移至主(复合)图表,但散点图特有的所有内容都保留在上面。我们还在组合中添加了折线图,它使用基于散点组的"fake group"。
这个假群特别假,不过应该够入门了。由于今天没时间学数学,所以就假设第一个和最后一个点是回归:
function regressionGroup(group) {
return {
all: function() {
var _all = group.all();
var first, last;
for(var i=0; i < _all.length; ++i) {
var key = _all[i].key;
if(!isNaN(key[0]) && !isNaN(key[1])) {
var kv = {key: key[0], value: key[1]};
if(!first)
first = kv;
last = kv;
}
}
return [first, last];
}
};
}
与所有假组一样,我们的想法是在图表要求时(并且很快)根据另一个组计算一些类似组的数据。这里的计算不是很有趣,因为你知道如何计算回归而我不知道。您需要用实际计算替换 first
和 last
以及 for 循环;这一切所做的就是检查有效点并保留它找到的第一个和最后一个点。
有趣的是,散点图采用键包含 x 和 y 坐标的数据,但折线图采用键为 x、值为 y 的数据。这就是我们进行转换 kv = {key: key[0], value: key[1]}
的原因
后记
请注意,如果您将回归指导点放在域之外,您将 运行 陷入 dc.js 错误 - the stack mixin is too aggressive about clipping points to the domain。有一个简单、丑陋的解决方法似乎在这种情况下有效:告诉折线图它有一个序数 x 刻度,即使它没有:
var composite = dc.compositeChart("#composite"),
lineChart;
composite
.width(600)
// ...
.compose([
// ...
lineChart = dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
lineChart.isOrdinal = d3.functor(true);
呸!但它有效!这个 hack 可能只适用于复合内部!
我有一个功能齐全的回归示例。当我来这里寻求帮助时,我正是这样做的,我发现了你的问题。它需要 regression.js
(here).
这遵循了 Gordon 关于 "fake group" 的出色建议,实际上应该将其称为内联组、直接组,甚至是动态组。这是我的:
function myRegressionGroup(group, min, max, filter = false) {
return {
all: function() {
var _all = group.all();
var first, last;
if(filter) reg = regression.linear(_all.filter(function(k,v) {if(k.key[0]) return k.key}).map((k,v) => k.key));
else reg = regression.linear(_all.map((k,v) => k.key));
first = reg.predict(min);
last = reg.predict(max)
return [{key:first[0], value: first[1]}, {key: last[0], value: last[1]}]
}
};
}
请注意,此函数需要交叉过滤器组以及 x 尺度的 min
和 max
。由于您通常为 xScale 计算这些值,因此只需在此处重用它们。这是因为函数使用极值用predict
方法计算回归线的两个点。
可选的 filter
数据整理器供您决定是否删除 x 上的空值。
@Gordon, how should I do in order to include my regression example in the Examples of using dc.js?
我正在使用 dc.js 和 crossfilter.js 创建一个 d3 仪表板,我想知道如何将回归线实现到响应过滤的散点图中。
我一直在玩几个重新添加回归线的例子,但我一直没有成功提取和合并代码。
我对数学没有问题,但对如何从维度访问过滤后的数据,然后如何将回归线添加到过滤后的散点图(这样回归线也响应未来的过滤)。
var data = [
{"record":"record","date":"date","cars":"cars","bikes":"bikes"},
{"record":"1","date":"01/05/2012","cars":"1488.1","bikes":"49.73"},
{"record":"2","date":"02/05/2012","cars":"1374.29","bikes":"52.44"},
{"record":"3","date":"03/05/2012","cars":"1353.01","bikes":"47.92"},
{"record":"4","date":"04/05/2012","cars":"1420.33","bikes":"50.69"},
{"record":"5","date":"05/05/2012","cars":"1544.11","bikes":"47.47"},
{"record":"6","date":"06/05/2012","cars":"1292.84","bikes":"47.75"},
{"record":"7","date":"07/05/2012","cars":"1318.9","bikes":"48.64"},
{"record":"8","date":"08/05/2012","cars":"1686.3","bikes":"50.9"},
{"record":"9","date":"09/05/2012","cars":"1603.99","bikes":"53.44"},
{"record":"10","date":"10/05/2012","cars":"1420.1","bikes":"53.29"},
{"record":"11","date":"11/05/2012","cars":"1410.8","bikes":"54.06"},
{"record":"12","date":"12/05/2012","cars":"1374.62","bikes":"51.24"},
{"record":"13","date":"13/05/2012","cars":"1279.53","bikes":"53.96"},
{"record":"14","date":"14/05/2012","cars":"1330.47","bikes":"49.5"},
{"record":"15","date":"15/05/2012","cars":"1377.61","bikes":"52.32"},
{"record":"16","date":"16/05/2012","cars":"1302.12","bikes":"51.96"},
{"record":"17","date":"17/05/2012","cars":"1326.9","bikes":"49.86"},
{"record":"18","date":"18/05/2012","cars":"1181.55","bikes":"50.25"},
{"record":"19","date":"19/05/2012","cars":"1493.75","bikes":"51.24"},
{"record":"20","date":"20/05/2012","cars":"1463.9","bikes":"50.88"},
{"record":"21","date":"21/05/2012","cars":"1370.16","bikes":"51.09"},
{"record":"22","date":"22/05/2012","cars":"1403.3","bikes":"51.67"},
{"record":"23","date":"23/05/2012","cars":"1277.65","bikes":"49.3"},
{"record":"24","date":"24/05/2012","cars":"1361.94","bikes":"50.47"},
{"record":"25","date":"25/05/2012","cars":"1400.8","bikes":"51.55"},
{"record":"26","date":"26/05/2012","cars":"1289.09","bikes":"47.17"},
{"record":"27","date":"27/05/2012","cars":"1258.39","bikes":"52.12"},
{"record":"28","date":"28/05/2012","cars":"1288.71","bikes":"49.28"},
{"record":"29","date":"29/05/2012","cars":"1511.86","bikes":"50.73"},
{"record":"30","date":"30/05/2012","cars":"1300.38","bikes":"52.39"},
{"record":"31","date":"31/05/2012","cars":"1455.19","bikes":"49.53"},
{"record":"32","date":"01/06/2012","cars":"1311.89","bikes":"50.37"},
{"record":"33","date":"02/06/2012","cars":"1368.64","bikes":"50.87"},
{"record":"34","date":"03/06/2012","cars":"1360.05","bikes":"50.51"},
{"record":"35","date":"04/06/2012","cars":"1382.56","bikes":"49.67"},
{"record":"36","date":"05/06/2012","cars":"1304.15","bikes":"47.6"},
{"record":"37","date":"06/06/2012","cars":"1271.57","bikes":"50.22"},
{"record":"38","date":"07/06/2012","cars":"1442.38","bikes":"50.8"},
{"record":"39","date":"08/06/2012","cars":"1406.38","bikes":"53.14"},
{"record":"40","date":"09/06/2012","cars":"1724.16","bikes":"49.66"},
{"record":"41","date":"10/06/2012","cars":"1931.05","bikes":"53"},
{"record":"42","date":"11/06/2012","cars":"1669.47","bikes":"53.71"},
{"record":"43","date":"12/06/2012","cars":"1794.06","bikes":"51.78"},
{"record":"44","date":"13/06/2012","cars":"1625.98","bikes":"51.58"},
{"record":"45","date":"14/06/2012","cars":"1371.51","bikes":"52.36"},
{"record":"46","date":"15/06/2012","cars":"1418.05","bikes":"47.64"},
{"record":"47","date":"16/06/2012","cars":"1431","bikes":"53.14"},
{"record":"48","date":"17/06/2012","cars":"1527.21","bikes":"48.63"},
{"record":"49","date":"18/06/2012","cars":"1320.95","bikes":"51.7"},
{"record":"50","date":"19/06/2012","cars":"1396.93","bikes":"52.92"}
];
tSel1 = "cars";
tSel2 = "bikes";
data.forEach(function (d) {
d[tSel1] = +d[tSel1];
d[tSel2] = +d[tSel2];
});
var facts = crossfilter(data);
var allDimension = facts.groupAll();
var scatterDimension = facts.dimension(function(d) {return [+d[tSel1], +d[tSel2]];});
var scatterGroup = scatterDimension.group().reduceSum(function(d) { return d[tSel1]; });
var maxY1 = d3.max(data, function(d) {return d[tSel1]});
var maxY2 = d3.max(data, function(d) {return d[tSel2]});
var maxY1Plus = maxY1 + (maxY1 * 0.1);
var maxY2Plus = maxY2 + (maxY2 * 0.1);
var minY1 = d3.min(data, function(d) {return d[tSel1]});
var minY1Minus = minY1 * 0.9;
var minY2 = d3.min(data, function(d) {return d[tSel2]});
var minY2Minus = minY2 * 0.9;
xyScatterChart = dc.scatterPlot("#scatterPlot");
xyScatterChart
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true)
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
dc.renderAll();
dc.redrawAll();
<link href="http://dc-js.github.io/dc.js/css/dc.css" rel="stylesheet"/>
<script src="http://dc-js.github.io/dc.js/js/d3.js"></script>
<script src="http://dc-js.github.io/dc.js/js/crossfilter.js"></script>
<script src="http://dc-js.github.io/dc.js/js/dc.js"></script>
<div id="scatterPlot"></div>
参考文献:
https://groups.google.com/forum/#!topic/dc-js-user-group/HaQMegKa_U0
包含一个 example in dc.js 会很棒,因为这是很多人都可以使用的东西。
也许我们可以一起努力?我不懂数学,但这里有一种简单的方法,可以使用复合图表显示根据聚合组计算的数据线。
首先,这是嵌入了旧散点图的合成图表:
var composite = dc.compositeChart("#composite");
composite
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.compose([
dc.scatterPlot(composite)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true),
dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
请注意,我们为合成图和散点图都提供了散点组。那只是因为复合图表需要一个组,即使它实际上并不使用它。
我们已将与坐标相关的参数移至主(复合)图表,但散点图特有的所有内容都保留在上面。我们还在组合中添加了折线图,它使用基于散点组的"fake group"。
这个假群特别假,不过应该够入门了。由于今天没时间学数学,所以就假设第一个和最后一个点是回归:
function regressionGroup(group) {
return {
all: function() {
var _all = group.all();
var first, last;
for(var i=0; i < _all.length; ++i) {
var key = _all[i].key;
if(!isNaN(key[0]) && !isNaN(key[1])) {
var kv = {key: key[0], value: key[1]};
if(!first)
first = kv;
last = kv;
}
}
return [first, last];
}
};
}
与所有假组一样,我们的想法是在图表要求时(并且很快)根据另一个组计算一些类似组的数据。这里的计算不是很有趣,因为你知道如何计算回归而我不知道。您需要用实际计算替换 first
和 last
以及 for 循环;这一切所做的就是检查有效点并保留它找到的第一个和最后一个点。
有趣的是,散点图采用键包含 x 和 y 坐标的数据,但折线图采用键为 x、值为 y 的数据。这就是我们进行转换 kv = {key: key[0], value: key[1]}
后记
请注意,如果您将回归指导点放在域之外,您将 运行 陷入 dc.js 错误 - the stack mixin is too aggressive about clipping points to the domain。有一个简单、丑陋的解决方法似乎在这种情况下有效:告诉折线图它有一个序数 x 刻度,即使它没有:
var composite = dc.compositeChart("#composite"),
lineChart;
composite
.width(600)
// ...
.compose([
// ...
lineChart = dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
lineChart.isOrdinal = d3.functor(true);
呸!但它有效!这个 hack 可能只适用于复合内部!
我有一个功能齐全的回归示例。当我来这里寻求帮助时,我正是这样做的,我发现了你的问题。它需要 regression.js
(here).
这遵循了 Gordon 关于 "fake group" 的出色建议,实际上应该将其称为内联组、直接组,甚至是动态组。这是我的:
function myRegressionGroup(group, min, max, filter = false) {
return {
all: function() {
var _all = group.all();
var first, last;
if(filter) reg = regression.linear(_all.filter(function(k,v) {if(k.key[0]) return k.key}).map((k,v) => k.key));
else reg = regression.linear(_all.map((k,v) => k.key));
first = reg.predict(min);
last = reg.predict(max)
return [{key:first[0], value: first[1]}, {key: last[0], value: last[1]}]
}
};
}
请注意,此函数需要交叉过滤器组以及 x 尺度的 min
和 max
。由于您通常为 xScale 计算这些值,因此只需在此处重用它们。这是因为函数使用极值用predict
方法计算回归线的两个点。
可选的 filter
数据整理器供您决定是否删除 x 上的空值。
@Gordon, how should I do in order to include my regression example in the Examples of using dc.js?