如何在 MySQL 或 Google Data Studio 中按顺序条件分组?
How to group with sequence conditions in MySQL or Google Data Studio?
我有一个考勤数据库,员工记录存储如下
MySQL版本5.7.26-log(日期没有漏洞,所有日期都存在但为了方便,我已经从插入行语句中删除了批量日期数据)
CREATE TABLE `Whosebug` ( `id` int(9) NOT NULL DEFAULT '0',
`Date` date NOT NULL,
`EmpID` varchar(100) NOT NULL,
`name` varchar(100) NOT NULL,
`TeamName` varchar(100) NOT NULL,
`Status` varchar(100) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- -- Dumping data for table `Whosebug` --
INSERT INTO `Whosebug` (`id`, `Date`, `EmpID`, `name`, `TeamName`, `Status`)
VALUES
(5, '2019-03-01', '303016', 'Yatendra Ranawat', 'Computer Vision', 'P'),
(23, '2019-03-01', '303128', 'Nikhat Khan', 'Media - 3D Automation', 'P'),
(193606, '2019-09-02', '303016', 'Yatendra Ranawat', 'Computer Vision', 'P'),
(194631, '2019-09-03', '303016', 'Yatendra Ranawat', 'Noon', '-'),
(222309, '2019-09-30', '303016', 'Yatendra Ranawat', 'Noon', '-'),
(223336, '2019-10-01', '303016', 'Yatendra Ranawat', 'Noon-Indore', 'P'),
(282742, '2019-11-28', '303016', 'Yatendra Ranawat', 'Noon-Indore', '-'),
(283765, '2019-11-29', '303016', 'Yatendra Ranawat', 'Home Depot - Indore', 'P'),
(303251, '2019-12-18', '303128', 'Nikhat Khan', 'Media - 3D Automation', '-'),
(304275, '2019-12-19', '303128', 'Nikhat Khan', 'US Taxonomy - Indore', 'P'),
(309393, '2019-12-24', '303128', 'Nikhat Khan', 'US Taxonomy - Indore', 'P'),
(310416, '2019-12-25', '303128', 'Nikhat Khan', 'Media - 3D Automation', 'PH'),
(354076, '2020-02-06', '303016', 'Yatendra Ranawat', 'Home Depot - Indore', '-'),
(354088, '2020-02-06', '303128', 'Nikhat Khan', 'Media - 3D Automation', 'P');
我正在使用 Google Data Studio 为团队中的员工持续时间创建报告,如下所示
当员工从一个团队调到另一个团队而不是 return 他以前工作过的团队时,它会完美地工作。但是,当一名员工 return 加入他之前工作过的团队时,它会聚集最大和最小逻辑
我需要在 MySQL / Google Data Studio 中做什么才能得到像下面这样的结果以及像上面那样的数据 table?
如果您需要我这边的任何详细信息,请告诉我。任何建议或指导将不胜感激。
测试
SELECT t4.EmpID, t4.name, t4.TeamName, MIN(t4.startdate) startdate, t4.enddate
FROM ( SELECT t1.EmpID, t1.name, t1.TeamName, t1.`Date` startdate, MAX(t2.`Date`) enddate
FROM Whosebug t1
JOIN Whosebug t2 ON t1.EmpID = t2.EmpID
AND t1.TeamName = t2.TeamName
AND t1.`Date` < t2.`Date`
LEFT JOIN Whosebug t3 ON t1.EmpID = t3.EmpID
AND t1.TeamName != t3.TeamName
AND t1.`Date` < t3.`Date`
AND t3.`Date` < t2.`Date`
WHERE t3.EmpId IS NULL
GROUP BY 1,2,3,4 ) t4
GROUP BY 1,2,3,5
ORDER BY 1,2,4,5;
= this query is too slow :( Server crashes while running on a table containing 222839 rows – Yatendra Ranawat
= @YatendraRanawat Convert LEFT JOIN into NOT EXISTS... – Akina
SELECT t4.EmpID, t4.name, t4.TeamName, MIN(t4.startdate) startdate, t4.enddate
FROM ( SELECT t1.EmpID, t1.name, t1.TeamName, t1.`Date` startdate, MAX(t2.`Date`) enddate
FROM Whosebug t1
JOIN Whosebug t2 ON t1.EmpID = t2.EmpID
AND t1.TeamName = t2.TeamName
AND t1.`Date` < t2.`Date`
WHERE NOT EXISTS ( SELECT NULL
FROM Whosebug t3
WHERE t1.EmpID = t3.EmpID
AND t1.TeamName != t3.TeamName
AND t1.`Date` < t3.`Date`
AND t3.`Date` < t2.`Date` )
GROUP BY 1,2,3,4 ) t4
GROUP BY 1,2,3,5
ORDER BY 1,2,4,5;
也测试下一个变体,速度更快:
SELECT EmpID, name, TeamName, startdate, MAX(enddate) enddate
FROM (
SELECT name,
CASE WHEN EmpId = @id AND TeamName = @team
THEN @startdate
ELSE @startdate := `Date`
END startdate,
`Date` enddate,
@id := EmpID EmpID,
@team := TeamName TeamName
FROM Whosebug, (SELECT @id := '', @team := '', @startdate := '') variables
ORDER BY EmpID, `Date`
) t
GROUP BY 1,2,3,4
ORDER BY 1,2,4,5;
在我的带有热缓存的系统上,它需要 0.03 秒,而上一个答案的查询分别为 25.39 秒和 1 分 54.79 秒。在您拥有 220k 条记录的系统上,差异一定更大。
我有一个考勤数据库,员工记录存储如下
MySQL版本5.7.26-log(日期没有漏洞,所有日期都存在但为了方便,我已经从插入行语句中删除了批量日期数据)
CREATE TABLE `Whosebug` ( `id` int(9) NOT NULL DEFAULT '0',
`Date` date NOT NULL,
`EmpID` varchar(100) NOT NULL,
`name` varchar(100) NOT NULL,
`TeamName` varchar(100) NOT NULL,
`Status` varchar(100) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
-- -- Dumping data for table `Whosebug` --
INSERT INTO `Whosebug` (`id`, `Date`, `EmpID`, `name`, `TeamName`, `Status`)
VALUES
(5, '2019-03-01', '303016', 'Yatendra Ranawat', 'Computer Vision', 'P'),
(23, '2019-03-01', '303128', 'Nikhat Khan', 'Media - 3D Automation', 'P'),
(193606, '2019-09-02', '303016', 'Yatendra Ranawat', 'Computer Vision', 'P'),
(194631, '2019-09-03', '303016', 'Yatendra Ranawat', 'Noon', '-'),
(222309, '2019-09-30', '303016', 'Yatendra Ranawat', 'Noon', '-'),
(223336, '2019-10-01', '303016', 'Yatendra Ranawat', 'Noon-Indore', 'P'),
(282742, '2019-11-28', '303016', 'Yatendra Ranawat', 'Noon-Indore', '-'),
(283765, '2019-11-29', '303016', 'Yatendra Ranawat', 'Home Depot - Indore', 'P'),
(303251, '2019-12-18', '303128', 'Nikhat Khan', 'Media - 3D Automation', '-'),
(304275, '2019-12-19', '303128', 'Nikhat Khan', 'US Taxonomy - Indore', 'P'),
(309393, '2019-12-24', '303128', 'Nikhat Khan', 'US Taxonomy - Indore', 'P'),
(310416, '2019-12-25', '303128', 'Nikhat Khan', 'Media - 3D Automation', 'PH'),
(354076, '2020-02-06', '303016', 'Yatendra Ranawat', 'Home Depot - Indore', '-'),
(354088, '2020-02-06', '303128', 'Nikhat Khan', 'Media - 3D Automation', 'P');
我正在使用 Google Data Studio 为团队中的员工持续时间创建报告,如下所示
当员工从一个团队调到另一个团队而不是 return 他以前工作过的团队时,它会完美地工作。但是,当一名员工 return 加入他之前工作过的团队时,它会聚集最大和最小逻辑
我需要在 MySQL / Google Data Studio 中做什么才能得到像下面这样的结果以及像上面那样的数据 table?
如果您需要我这边的任何详细信息,请告诉我。任何建议或指导将不胜感激。
测试
SELECT t4.EmpID, t4.name, t4.TeamName, MIN(t4.startdate) startdate, t4.enddate
FROM ( SELECT t1.EmpID, t1.name, t1.TeamName, t1.`Date` startdate, MAX(t2.`Date`) enddate
FROM Whosebug t1
JOIN Whosebug t2 ON t1.EmpID = t2.EmpID
AND t1.TeamName = t2.TeamName
AND t1.`Date` < t2.`Date`
LEFT JOIN Whosebug t3 ON t1.EmpID = t3.EmpID
AND t1.TeamName != t3.TeamName
AND t1.`Date` < t3.`Date`
AND t3.`Date` < t2.`Date`
WHERE t3.EmpId IS NULL
GROUP BY 1,2,3,4 ) t4
GROUP BY 1,2,3,5
ORDER BY 1,2,4,5;
= this query is too slow :( Server crashes while running on a table containing 222839 rows – Yatendra Ranawat
= @YatendraRanawat Convert LEFT JOIN into NOT EXISTS... – Akina
SELECT t4.EmpID, t4.name, t4.TeamName, MIN(t4.startdate) startdate, t4.enddate
FROM ( SELECT t1.EmpID, t1.name, t1.TeamName, t1.`Date` startdate, MAX(t2.`Date`) enddate
FROM Whosebug t1
JOIN Whosebug t2 ON t1.EmpID = t2.EmpID
AND t1.TeamName = t2.TeamName
AND t1.`Date` < t2.`Date`
WHERE NOT EXISTS ( SELECT NULL
FROM Whosebug t3
WHERE t1.EmpID = t3.EmpID
AND t1.TeamName != t3.TeamName
AND t1.`Date` < t3.`Date`
AND t3.`Date` < t2.`Date` )
GROUP BY 1,2,3,4 ) t4
GROUP BY 1,2,3,5
ORDER BY 1,2,4,5;
也测试下一个变体,速度更快:
SELECT EmpID, name, TeamName, startdate, MAX(enddate) enddate
FROM (
SELECT name,
CASE WHEN EmpId = @id AND TeamName = @team
THEN @startdate
ELSE @startdate := `Date`
END startdate,
`Date` enddate,
@id := EmpID EmpID,
@team := TeamName TeamName
FROM Whosebug, (SELECT @id := '', @team := '', @startdate := '') variables
ORDER BY EmpID, `Date`
) t
GROUP BY 1,2,3,4
ORDER BY 1,2,4,5;
在我的带有热缓存的系统上,它需要 0.03 秒,而上一个答案的查询分别为 25.39 秒和 1 分 54.79 秒。在您拥有 220k 条记录的系统上,差异一定更大。