在 Power Query 或 PostgreSQL 中有条件地从 table 中删除行
Conditionally remove row from table in Power Query or PostgreSQL
我有一个员工列表,对于一些员工,当他们从合同工变为永久员工时,我有两行。 我想删除员工是承包商的那一行。
employeeID
EmployeeType
Other Employee Data
216
contract
drop this row
641
fulltime
216
fulltime
853
contract
我要当输出
employeeID
EmployeeType
Other Employee Data
641
fulltime
216
fulltime
853
contract
关于如何在 Power Query(用于测试)和/或 PostgreSQL(最终实施)中执行此操作的任何建议?
编辑:2022/01/17
@ron-rosenfeld 在下面提供了 Power Query 的解决方案。我将尝试在这里总结一下。任何不准确的地方都是由于我缺乏理解。
- 指定要分组的列 (Microsoft Table.Group() Documentation)
- 用新的列名指定聚合,return聚合中的子table
- 我们创建过滤 table 从子table
中删除“合同”
- 如果 sub-table 中的记录数多于 1,我们 return 过滤 sub-table 否则 sub-table 本身
- 扩展 returned 记录
let
Source = _my source_
#"Grouped Rows" = Table.Group(Source, {"employeeID"}, {
{"TempTable", (t)=>
let ft = Table.SelectRows(t, each [EmployeeType] <> "contract")
in if Table.RowCount(t) > 1 then ft{0} else t{0}
}
}),
#"Expanded Employee Type" = Table.ExpandRecordColumn(#"Grouped Rows", "TempTable", {"EmployeeType", "Other Employee Data"}, {"EmployeeType", "Other Employee Data"})
in
#"Expanded Employee Type"
DELETE FROM tab
WHERE employeeid IN (SELECT employeeid
FROM tab
GROUP BY employeeid
HAVING Count(*) > 1)
AND employeetype = 'contract';
完全执行:
postgres=# create table tab(employeeid int,employeetype varchar(20));
CREATE TABLE
postgres=# insert into tab values(216,'contract'),(641,'fulltime'),(216,'fulltime'),(851,'contract');
INSERT 0 4
postgres=# select * from tab;
employeeid | employeetype
------------+--------------
216 | contract
641 | fulltime
216 | fulltime
851 | contract
(4 rows)
postgres=# delete from Tab where employeeid in (select employeeid from tab group by employeeid having count(*)>1) and employeetype='contract';
DELETE 1
postgres=# select * from tab;
employeeid | employeetype
------------+--------------
641 | fulltime
216 | fulltime
851 | contract
(3 rows)
编辑 2022 年 1 月 16 日 列出所有行,但不包括以前是承包商的员工。直接全职或直接承包商的行。
SELECT *
FROM tab
WHERE employeeid NOT IN (SELECT employeeid
FROM tab
GROUP BY employeeid
HAVING Count(*) > 1);
完全执行:
postgres=# truncate table tab;
TRUNCATE TABLE
postgres=# insert into tab values(216,'contract'),(641,'fulltime'),(216,'fulltime'),(851,'contract');
INSERT 0 4
postgres=# select * from tab;
employeeid | employeetype
------------+--------------
216 | contract
641 | fulltime
216 | fulltime
851 | contract
postgres=# SELECT *
postgres-# FROM tab
postgres-# WHERE employeeid NOT IN (SELECT employeeid
postgres(# FROM tab
postgres(# GROUP BY employeeid
postgres(# HAVING Count(*) > 1);
employeeid | employeetype
------------+--------------
641 | fulltime
851 | contract
(2 rows)
编辑 2022 年 1 月 17 日 使用 ANTI-JOIN 排除具有全职条目和合同条目的员工的合同条目后的行列表:
SELECT *
FROM tab outer_tab
WHERE NOT EXISTS (SELECT 1
FROM tab inner_tab
WHERE inner_tab.employeeid = outer_tab.employeeid
AND inner_tab.employeetype = 'fulltime'
AND outer_tab.employeetype = 'contract');
执行:
postgres=# SELECT *
postgres-# FROM tab outer_tab
postgres-# WHERE NOT EXISTS (SELECT 1
postgres(# FROM tab inner_tab
postgres(# WHERE inner_tab.employeeid = outer_tab.employeeid
postgres(# AND inner_tab.employeetype = 'fulltime'
postgres(# AND outer_tab.employeetype = 'contract');
employeeid | employeetype
------------+--------------
641 | fulltime
216 | fulltime
851 | contract
(3 rows)
在 Power Query 中:
- 按 ID 分组
- 然后创建自定义聚合以
- 将每个子表过滤为 return 只有“全职”
- 如果过滤后没有行,return第一个“Type”,否则return全时
let
Source = Excel.CurrentWorkbook(){[Name="Table15"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"employeeID", Int64.Type}, {"EmployeeType", type text}}),
#"Grouped Rows" = Table.Group(#"Changed Type", {"employeeID"}, {
{"Employee Type", (t) =>
let
ft = Table.SelectRows(t, each [EmployeeType] = "fulltime")
in
if Table.RowCount(ft) > 0 then "fulltime" else t[EmployeeType]{0}}
})
in
#"Grouped Rows"
编辑: 要 return 整个筛选行而不仅仅是类型,只需将聚合更改为 return 相关记录,然后展开该列。例如
let
Source = Excel.CurrentWorkbook(){[Name="Table6"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"employeeID", Int64.Type}, {"EmployeeType", type text}}),
#"Grouped Rows" = Table.Group(#"Changed Type", {"employeeID"}, {
{"Employee Type", (t) =>
let
ft = Table.SelectRows(t, each [EmployeeType] = "fulltime")
in
if Table.RowCount(ft) > 0 then ft{0} else t{0}}
}),
#"Expanded Employee Type" = Table.ExpandRecordColumn(#"Grouped Rows", "Employee Type", {"EmployeeType", "Other Employee Data"}, {"EmployeeType", "Other Employee Data"})
in
#"Expanded Employee Type"
我有一个员工列表,对于一些员工,当他们从合同工变为永久员工时,我有两行。 我想删除员工是承包商的那一行。
employeeID | EmployeeType | Other Employee Data |
---|---|---|
216 | contract | drop this row |
641 | fulltime | |
216 | fulltime | |
853 | contract |
我要当输出
employeeID | EmployeeType | Other Employee Data |
---|---|---|
641 | fulltime | |
216 | fulltime | |
853 | contract |
关于如何在 Power Query(用于测试)和/或 PostgreSQL(最终实施)中执行此操作的任何建议?
编辑:2022/01/17
@ron-rosenfeld 在下面提供了 Power Query 的解决方案。我将尝试在这里总结一下。任何不准确的地方都是由于我缺乏理解。
- 指定要分组的列 (Microsoft Table.Group() Documentation)
- 用新的列名指定聚合,return聚合中的子table
- 我们创建过滤 table 从子table 中删除“合同”
- 如果 sub-table 中的记录数多于 1,我们 return 过滤 sub-table 否则 sub-table 本身
- 扩展 returned 记录
let
Source = _my source_
#"Grouped Rows" = Table.Group(Source, {"employeeID"}, {
{"TempTable", (t)=>
let ft = Table.SelectRows(t, each [EmployeeType] <> "contract")
in if Table.RowCount(t) > 1 then ft{0} else t{0}
}
}),
#"Expanded Employee Type" = Table.ExpandRecordColumn(#"Grouped Rows", "TempTable", {"EmployeeType", "Other Employee Data"}, {"EmployeeType", "Other Employee Data"})
in
#"Expanded Employee Type"
DELETE FROM tab
WHERE employeeid IN (SELECT employeeid
FROM tab
GROUP BY employeeid
HAVING Count(*) > 1)
AND employeetype = 'contract';
完全执行:
postgres=# create table tab(employeeid int,employeetype varchar(20));
CREATE TABLE
postgres=# insert into tab values(216,'contract'),(641,'fulltime'),(216,'fulltime'),(851,'contract');
INSERT 0 4
postgres=# select * from tab;
employeeid | employeetype
------------+--------------
216 | contract
641 | fulltime
216 | fulltime
851 | contract
(4 rows)
postgres=# delete from Tab where employeeid in (select employeeid from tab group by employeeid having count(*)>1) and employeetype='contract';
DELETE 1
postgres=# select * from tab;
employeeid | employeetype
------------+--------------
641 | fulltime
216 | fulltime
851 | contract
(3 rows)
编辑 2022 年 1 月 16 日 列出所有行,但不包括以前是承包商的员工。直接全职或直接承包商的行。
SELECT *
FROM tab
WHERE employeeid NOT IN (SELECT employeeid
FROM tab
GROUP BY employeeid
HAVING Count(*) > 1);
完全执行:
postgres=# truncate table tab;
TRUNCATE TABLE
postgres=# insert into tab values(216,'contract'),(641,'fulltime'),(216,'fulltime'),(851,'contract');
INSERT 0 4
postgres=# select * from tab;
employeeid | employeetype
------------+--------------
216 | contract
641 | fulltime
216 | fulltime
851 | contract
postgres=# SELECT *
postgres-# FROM tab
postgres-# WHERE employeeid NOT IN (SELECT employeeid
postgres(# FROM tab
postgres(# GROUP BY employeeid
postgres(# HAVING Count(*) > 1);
employeeid | employeetype
------------+--------------
641 | fulltime
851 | contract
(2 rows)
编辑 2022 年 1 月 17 日 使用 ANTI-JOIN 排除具有全职条目和合同条目的员工的合同条目后的行列表:
SELECT *
FROM tab outer_tab
WHERE NOT EXISTS (SELECT 1
FROM tab inner_tab
WHERE inner_tab.employeeid = outer_tab.employeeid
AND inner_tab.employeetype = 'fulltime'
AND outer_tab.employeetype = 'contract');
执行:
postgres=# SELECT *
postgres-# FROM tab outer_tab
postgres-# WHERE NOT EXISTS (SELECT 1
postgres(# FROM tab inner_tab
postgres(# WHERE inner_tab.employeeid = outer_tab.employeeid
postgres(# AND inner_tab.employeetype = 'fulltime'
postgres(# AND outer_tab.employeetype = 'contract');
employeeid | employeetype
------------+--------------
641 | fulltime
216 | fulltime
851 | contract
(3 rows)
在 Power Query 中:
- 按 ID 分组
- 然后创建自定义聚合以
- 将每个子表过滤为 return 只有“全职”
- 如果过滤后没有行,return第一个“Type”,否则return全时
let
Source = Excel.CurrentWorkbook(){[Name="Table15"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"employeeID", Int64.Type}, {"EmployeeType", type text}}),
#"Grouped Rows" = Table.Group(#"Changed Type", {"employeeID"}, {
{"Employee Type", (t) =>
let
ft = Table.SelectRows(t, each [EmployeeType] = "fulltime")
in
if Table.RowCount(ft) > 0 then "fulltime" else t[EmployeeType]{0}}
})
in
#"Grouped Rows"
编辑: 要 return 整个筛选行而不仅仅是类型,只需将聚合更改为 return 相关记录,然后展开该列。例如
let
Source = Excel.CurrentWorkbook(){[Name="Table6"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"employeeID", Int64.Type}, {"EmployeeType", type text}}),
#"Grouped Rows" = Table.Group(#"Changed Type", {"employeeID"}, {
{"Employee Type", (t) =>
let
ft = Table.SelectRows(t, each [EmployeeType] = "fulltime")
in
if Table.RowCount(ft) > 0 then ft{0} else t{0}}
}),
#"Expanded Employee Type" = Table.ExpandRecordColumn(#"Grouped Rows", "Employee Type", {"EmployeeType", "Other Employee Data"}, {"EmployeeType", "Other Employee Data"})
in
#"Expanded Employee Type"