使用 powerquery M 代码数据整理,如何从分组行中填充缺失值
Using powerquery M code data wrangling, how to fill in missing value from grouped rows
使用 powerquery M 代码,如何使用一组行中最常见的值来填充所需的缺失值?
例如,从这个开始 table:
id
group
attribute 1
attribute 2
attribute 3
4
AA
example1
example2
8
AA
example2
9
AA
example1
example1
13
AB
example4
example2
example3
14
AB
example4
example2
example3
15
AB
19
BB
20
BB
example5
23
BB
我想修复属性 1,所以我想结束:
id
group
attribute 1
attribute 2
attribute 3
4
AA
example1
example2
8
AA
example1
example2
9
AA
example1
example1
13
AB
example4
example2
example3
14
AB
example4
example2
example3
15
AB
example4
19
BB
example5
20
BB
example5
23
BB
example5
可以使用向下填充功能来做到这一点
= Table.FillDown(Source,{"attribute 1"})
首先,我打算建议填写...向下...但您的数据不适合这样做。看来您正在使用每个组中重复次数最多的值来替换该组中的空值。这样就可以了
let Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
// get table of counts for Attribute 1 by Group
#"Grouped Rows" = Table.Group(Source, {"group", "attribute 1"}, {{"Count", each Table.RowCount(_), type number}}),
// take out nulls
#"Filtered Rows" = Table.SelectRows(#"Grouped Rows", each ([attribute 1] <> null)),
// Group again, sort on count, add index. The row with index=1 will be the attribute most repeated for the group
#"Grouped rows2" = Table.Group(#"Filtered Rows", {"group"}, {{"NiceTable", each Table.AddIndexColumn(Table.Sort(_,{{"Count", Order.Descending}} ), "Index",1,1), type table}} ),
#"Expanded NiceTable" = Table.ExpandTableColumn(#"Grouped rows2", "NiceTable", {"attribute 1", "Index"}, {"NiceTable.attribute 1", "NiceTable.Index"}),
#"Filtered Rows1" = Table.SelectRows(#"Expanded NiceTable", each ([NiceTable.Index] = 1)),
// merge this into the orginal table and add custom column to replace blank rows with the most frequent other answer
#"Merged Queries" = Table.NestedJoin(Source,{"group"},#"Filtered Rows1",{"group"},"FR",JoinKind.LeftOuter),
#"Expanded FR" = Table.ExpandTableColumn(#"Merged Queries", "FR", {"NiceTable.attribute 1"}, {"NiceTable.attribute 1"}),
#"Added Custom" = Table.AddColumn(#"Expanded FR", "Custom", each if [attribute 1]=null then [NiceTable.attribute 1] else [attribute 1])
in #"Added Custom"
也许下面的方法是正确的,但只要 OP 不澄清他的 post 他的问题是什么,就很难说清楚,至少对我来说是这样。
let
Quelle = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Geänderter Typ" = Table.TransformColumnTypes(Quelle,{{"id", Int64.Type}, {"group", type text}, {"attribute 1", type text}, {"attribute 2", type text}, {"attribute 3", type text}}),
#"Hinzugefügte benutzerdefinierte Spalte" = Table.AddColumn(#"Geänderter Typ", "newAttrib1", each fx2nd([group]))
in
#"Hinzugefügte benutzerdefinierte Spalte"
函数 fx2nd 看起来像
(group as text) =>
let
Quelle = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Geänderter Typ" = Table.TransformColumnTypes(Quelle,{{"id", Int64.Type}, {"group", type text}, {"attribute 1", type text}, {"attribute 2", type text}, {"attribute 3", type text}}),
#"Gefilterte Zeilen" = Table.SelectRows(#"Geänderter Typ", each ([group] = group)),
#"Gefilterte Zeilen1" = Table.SelectRows(#"Gefilterte Zeilen", each [attribute 1] <> null),
#"Beibehaltene erste Zeilen" = Table.FirstN(#"Gefilterte Zeilen1",1),
#"attribute 1" = #"Beibehaltene erste Zeilen"{0}[attribute 1]
in
#"attribute 1"
使用 powerquery M 代码,如何使用一组行中最常见的值来填充所需的缺失值?
例如,从这个开始 table:
id | group | attribute 1 | attribute 2 | attribute 3 |
---|---|---|---|---|
4 | AA | example1 | example2 | |
8 | AA | example2 | ||
9 | AA | example1 | example1 | |
13 | AB | example4 | example2 | example3 |
14 | AB | example4 | example2 | example3 |
15 | AB | |||
19 | BB | |||
20 | BB | example5 | ||
23 | BB |
我想修复属性 1,所以我想结束:
id | group | attribute 1 | attribute 2 | attribute 3 |
---|---|---|---|---|
4 | AA | example1 | example2 | |
8 | AA | example1 | example2 | |
9 | AA | example1 | example1 | |
13 | AB | example4 | example2 | example3 |
14 | AB | example4 | example2 | example3 |
15 | AB | example4 | ||
19 | BB | example5 | ||
20 | BB | example5 | ||
23 | BB | example5 |
可以使用向下填充功能来做到这一点
= Table.FillDown(Source,{"attribute 1"})
首先,我打算建议填写...向下...但您的数据不适合这样做。看来您正在使用每个组中重复次数最多的值来替换该组中的空值。这样就可以了
let Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
// get table of counts for Attribute 1 by Group
#"Grouped Rows" = Table.Group(Source, {"group", "attribute 1"}, {{"Count", each Table.RowCount(_), type number}}),
// take out nulls
#"Filtered Rows" = Table.SelectRows(#"Grouped Rows", each ([attribute 1] <> null)),
// Group again, sort on count, add index. The row with index=1 will be the attribute most repeated for the group
#"Grouped rows2" = Table.Group(#"Filtered Rows", {"group"}, {{"NiceTable", each Table.AddIndexColumn(Table.Sort(_,{{"Count", Order.Descending}} ), "Index",1,1), type table}} ),
#"Expanded NiceTable" = Table.ExpandTableColumn(#"Grouped rows2", "NiceTable", {"attribute 1", "Index"}, {"NiceTable.attribute 1", "NiceTable.Index"}),
#"Filtered Rows1" = Table.SelectRows(#"Expanded NiceTable", each ([NiceTable.Index] = 1)),
// merge this into the orginal table and add custom column to replace blank rows with the most frequent other answer
#"Merged Queries" = Table.NestedJoin(Source,{"group"},#"Filtered Rows1",{"group"},"FR",JoinKind.LeftOuter),
#"Expanded FR" = Table.ExpandTableColumn(#"Merged Queries", "FR", {"NiceTable.attribute 1"}, {"NiceTable.attribute 1"}),
#"Added Custom" = Table.AddColumn(#"Expanded FR", "Custom", each if [attribute 1]=null then [NiceTable.attribute 1] else [attribute 1])
in #"Added Custom"
也许下面的方法是正确的,但只要 OP 不澄清他的 post 他的问题是什么,就很难说清楚,至少对我来说是这样。
let
Quelle = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Geänderter Typ" = Table.TransformColumnTypes(Quelle,{{"id", Int64.Type}, {"group", type text}, {"attribute 1", type text}, {"attribute 2", type text}, {"attribute 3", type text}}),
#"Hinzugefügte benutzerdefinierte Spalte" = Table.AddColumn(#"Geänderter Typ", "newAttrib1", each fx2nd([group]))
in
#"Hinzugefügte benutzerdefinierte Spalte"
函数 fx2nd 看起来像
(group as text) =>
let
Quelle = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
#"Geänderter Typ" = Table.TransformColumnTypes(Quelle,{{"id", Int64.Type}, {"group", type text}, {"attribute 1", type text}, {"attribute 2", type text}, {"attribute 3", type text}}),
#"Gefilterte Zeilen" = Table.SelectRows(#"Geänderter Typ", each ([group] = group)),
#"Gefilterte Zeilen1" = Table.SelectRows(#"Gefilterte Zeilen", each [attribute 1] <> null),
#"Beibehaltene erste Zeilen" = Table.FirstN(#"Gefilterte Zeilen1",1),
#"attribute 1" = #"Beibehaltene erste Zeilen"{0}[attribute 1]
in
#"attribute 1"