Python groupby 缺少每个组的第一个元素
Python groupby is missing first element of each group
我有一个嵌套列表,我需要验证数据是否按第 2 列(票证的严重性)排序,然后在每个严重性组中按时间倒序排序。
我想我已经接近使用 groupby 的正确解决方案,但是由于某种原因,groupby 正在删除每个组中的第一个元素。喝多少咖啡都无法帮助我的大脑理解原因。
all_case_data = [
('01', 2, '2253415', datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1'),
('02', 3, '3824819', datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'),
('03', 3, '3824715', datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'),
('04', 3, '3824707', datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'),
('05', 3, '3824549', datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'),
('06', 3, '3824061', datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'),
('07', 3, '3822989', datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'),
('08', 3, '3822385', datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'),
('09', 3, '3822377', datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'),
('10', 3, '3820965', datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'),
('11', 3, '3820963', datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'),
('12', 3, '3767961', datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'),
('13', 3, '3767841', datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'),
('14', 3, '3767839', datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'),
('15', 3, '3767837', datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'),
('16', 3, '3767835', datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'),
('17', 3, '3767833', datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'),
('18', 3, '3767831', datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'),
('19', 3, '3767803', datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'),
('20', 3, '3767809', datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'),
('21', 3, '3767801', datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'),
('22', 3, '3767807', datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'),
('23', 3, '3767805', datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'),
('24', 3, '2257019', datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'),
('25', 3, '2256663', datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'),
('26', 3, '2252573', datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'),
('27', 3, '2252571', datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'),
('28', 3, '2252569', datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'),
('29', 3, '2252531', datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'),
('30', 3, '2252533', datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'),
('31', 3, '2252535', datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'),
('32', 3, '2252539', datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'),
('33', 3, '2252567', datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'),
('34', 3, '2252565', datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'),
('35', 3, '2252563', datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'),
('36', 3, '2252537', datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'),
('37', 3, '1168027', datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1'),
('38', 4, '3824817', datetime(2015, 4, 9, 14, 35, 36), 'New', 'user1'),
('39', 4, '3824717', datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'),
('40', 4, '3824709', datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'),
('41', 4, '3824065', datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'),
('42', 4, '3824063', datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1'),
]
from itertools import groupby
import operator
# Create new list composed of initial list, but sorted by Severity
all_case_data_sorted_by_severity = sorted(all_case_data, key=operator.itemgetter(1))
# Leveraging groupby, create a new list composed of sorted data, sorted in reverse chronological order within each group
groups = [] # Contains list of sub-lists grouped by the unique key (Severity)
uniquekeys = [] # Contains concise list of all of the unique keys (Severity)
# Note: groupby requires the list to first be sorted by the key (Severity) so use all_case_data_sorted_by_severity
for key, group in groupby(all_case_data_sorted_by_severity, operator.itemgetter(1)): # x[1] = Severity
for thing in group:
groups.append(sorted(list(group), key=operator.itemgetter(3), reverse=True)) # Within each group, sort by date
uniquekeys.append(key)
print "Uniquekeys:", uniquekeys
print "Groups:", groups
这是输出。请注意,我在组中遗漏了三个元素。每个唯一键的第一个元素都丢失了,所以严重性为 2 的第一个案例(案例 #01)、严重性为 3 的第一个案例(案例#02)、严重性为 4 的第一个案例(案例#38)。
Uniquekeys: [2, 3, 4]
Groups: [[], [('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), ('04', 3, '3824707', datetime.datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'), ('05', 3, '3824549', datetime.datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'), ('06', 3, '3824061', datetime.datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'), ('07', 3, '3822989', datetime.datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'), ('08', 3, '3822385', datetime.datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'), ('09', 3, '3822377', datetime.datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'), ('10', 3, '3820965', datetime.datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'), ('11', 3, '3820963', datetime.datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'), ('12', 3, '3767961', datetime.datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'), ('24', 3, '2257019', datetime.datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'), ('25', 3, '2256663', datetime.datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'), ('13', 3, '3767841', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('26', 3, '2252573', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('27', 3, '2252571', datetime.datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'), ('14', 3, '3767839', datetime.datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'), ('29', 3, '2252531', datetime.datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'), ('15', 3, '3767837', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('28', 3, '2252569', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('21', 3, '3767801', datetime.datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'), ('23', 3, '3767805', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('30', 3, '2252533', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('19', 3, '3767803', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('31', 3, '2252535', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('20', 3, '3767809', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('32', 3, '2252539', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('16', 3, '3767835', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('33', 3, '2252567', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('34', 3, '2252565', datetime.datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'), ('17', 3, '3767833', datetime.datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'), ('35', 3, '2252563', datetime.datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'), ('18', 3, '3767831', datetime.datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'), ('36', 3, '2252537', datetime.datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'), ('22', 3, '3767807', datetime.datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'), ('37', 3, '1168027', datetime.datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1')], [('39', 4, '3824717', datetime.datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'), ('40', 4, '3824709', datetime.datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'), ('41', 4, '3824065', datetime.datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'), ('42', 4, '3824063', datetime.datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1')]]
"...我需要验证数据是否按排序显示..." 你不需要 groupby
,你只需要对数据:
all_case_data_sorted = sorted(
all_case_data, key = lambda x:(x[1],datetime.max-x[3]))
如果出于某种原因,您不能通过取反 select 一个键(就像我上面所做的那样),您可以多次排序,每个键一次。这是有效的,因为 Python 排序是稳定的:
all_case_data_sorted = sorted(all_case_data, key = lambda x: x[3], reverse=True)
all_case_data_sorted = sorted(all_case_data_sorted, key = lambda x: x[1])
如果您没有在循环中引用迭代变量,这通常表明您的逻辑有问题:
for thing in group:
groups.append(sorted(list(group), key=operator.itemgetter(3), reverse=True)) # Within each group, sort by date
uniquekeys.append(key)
你为什么循环 group
?如果你删除那个循环,看起来你应该得到你想要的。这就是为什么您缺少第一个元素的原因:您在那里使用它,因为 thing
已绑定到它。没有它,我得到(更改缩进以使其更易于阅读):
Uniquekeys: [2, 3, 4]
Groups: [[('01', 2, '2253415', datetime.datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1')],
[('02', 3, '3824819', datetime.datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'),
('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), [and so on]
[旁白:使用 pandas 通常可以更轻松地处理表格数据;按严重性和反转日期排序将是 data.sort(["severity", "date"], ascending=[True, False])
,依此类推。)
作为解决此类问题的替代方法和更 pythonic 的方法,您可以使用 collection.defaultdict
:
>>> from collections import defaultdict
>>> d=defaultdict(list)
>>> for i in all_case_data :
... d[i[1]].append(i)
...
>>> d.keys()
[2, 3, 4]
>>> d.values()
[[('01', 2, '2253415', datetime.datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1')], [('02', 3, '3824819', datetime.datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'), ('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), ('04', 3, '3824707', datetime.datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'), ('05', 3, '3824549', datetime.datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'), ('06', 3, '3824061', datetime.datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'), ('07', 3, '3822989', datetime.datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'), ('08', 3, '3822385', datetime.datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'), ('09', 3, '3822377', datetime.datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'), ('10', 3, '3820965', datetime.datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'), ('11', 3, '3820963', datetime.datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'), ('12', 3, '3767961', datetime.datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'), ('13', 3, '3767841', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('14', 3, '3767839', datetime.datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'), ('15', 3, '3767837', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('16', 3, '3767835', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('17', 3, '3767833', datetime.datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'), ('18', 3, '3767831', datetime.datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'), ('19', 3, '3767803', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('20', 3, '3767809', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('21', 3, '3767801', datetime.datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'), ('22', 3, '3767807', datetime.datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'), ('23', 3, '3767805', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('24', 3, '2257019', datetime.datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'), ('25', 3, '2256663', datetime.datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'), ('26', 3, '2252573', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('27', 3, '2252571', datetime.datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'), ('28', 3, '2252569', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('29', 3, '2252531', datetime.datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'), ('30', 3, '2252533', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('31', 3, '2252535', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('32', 3, '2252539', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('33', 3, '2252567', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('34', 3, '2252565', datetime.datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'), ('35', 3, '2252563', datetime.datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'), ('36', 3, '2252537', datetime.datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'), ('37', 3, '1168027', datetime.datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1')], [('38', 4, '3824817', datetime.datetime(2015, 4, 9, 14, 35, 36), 'New', 'user1'), ('39', 4, '3824717', datetime.datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'), ('40', 4, '3824709', datetime.datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'), ('41', 4, '3824065', datetime.datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'), ('42', 4, '3824063', datetime.datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1')]]
我有一个嵌套列表,我需要验证数据是否按第 2 列(票证的严重性)排序,然后在每个严重性组中按时间倒序排序。
我想我已经接近使用 groupby 的正确解决方案,但是由于某种原因,groupby 正在删除每个组中的第一个元素。喝多少咖啡都无法帮助我的大脑理解原因。
all_case_data = [
('01', 2, '2253415', datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1'),
('02', 3, '3824819', datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'),
('03', 3, '3824715', datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'),
('04', 3, '3824707', datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'),
('05', 3, '3824549', datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'),
('06', 3, '3824061', datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'),
('07', 3, '3822989', datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'),
('08', 3, '3822385', datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'),
('09', 3, '3822377', datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'),
('10', 3, '3820965', datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'),
('11', 3, '3820963', datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'),
('12', 3, '3767961', datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'),
('13', 3, '3767841', datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'),
('14', 3, '3767839', datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'),
('15', 3, '3767837', datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'),
('16', 3, '3767835', datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'),
('17', 3, '3767833', datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'),
('18', 3, '3767831', datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'),
('19', 3, '3767803', datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'),
('20', 3, '3767809', datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'),
('21', 3, '3767801', datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'),
('22', 3, '3767807', datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'),
('23', 3, '3767805', datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'),
('24', 3, '2257019', datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'),
('25', 3, '2256663', datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'),
('26', 3, '2252573', datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'),
('27', 3, '2252571', datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'),
('28', 3, '2252569', datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'),
('29', 3, '2252531', datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'),
('30', 3, '2252533', datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'),
('31', 3, '2252535', datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'),
('32', 3, '2252539', datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'),
('33', 3, '2252567', datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'),
('34', 3, '2252565', datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'),
('35', 3, '2252563', datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'),
('36', 3, '2252537', datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'),
('37', 3, '1168027', datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1'),
('38', 4, '3824817', datetime(2015, 4, 9, 14, 35, 36), 'New', 'user1'),
('39', 4, '3824717', datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'),
('40', 4, '3824709', datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'),
('41', 4, '3824065', datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'),
('42', 4, '3824063', datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1'),
]
from itertools import groupby
import operator
# Create new list composed of initial list, but sorted by Severity
all_case_data_sorted_by_severity = sorted(all_case_data, key=operator.itemgetter(1))
# Leveraging groupby, create a new list composed of sorted data, sorted in reverse chronological order within each group
groups = [] # Contains list of sub-lists grouped by the unique key (Severity)
uniquekeys = [] # Contains concise list of all of the unique keys (Severity)
# Note: groupby requires the list to first be sorted by the key (Severity) so use all_case_data_sorted_by_severity
for key, group in groupby(all_case_data_sorted_by_severity, operator.itemgetter(1)): # x[1] = Severity
for thing in group:
groups.append(sorted(list(group), key=operator.itemgetter(3), reverse=True)) # Within each group, sort by date
uniquekeys.append(key)
print "Uniquekeys:", uniquekeys
print "Groups:", groups
这是输出。请注意,我在组中遗漏了三个元素。每个唯一键的第一个元素都丢失了,所以严重性为 2 的第一个案例(案例 #01)、严重性为 3 的第一个案例(案例#02)、严重性为 4 的第一个案例(案例#38)。
Uniquekeys: [2, 3, 4]
Groups: [[], [('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), ('04', 3, '3824707', datetime.datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'), ('05', 3, '3824549', datetime.datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'), ('06', 3, '3824061', datetime.datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'), ('07', 3, '3822989', datetime.datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'), ('08', 3, '3822385', datetime.datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'), ('09', 3, '3822377', datetime.datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'), ('10', 3, '3820965', datetime.datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'), ('11', 3, '3820963', datetime.datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'), ('12', 3, '3767961', datetime.datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'), ('24', 3, '2257019', datetime.datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'), ('25', 3, '2256663', datetime.datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'), ('13', 3, '3767841', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('26', 3, '2252573', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('27', 3, '2252571', datetime.datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'), ('14', 3, '3767839', datetime.datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'), ('29', 3, '2252531', datetime.datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'), ('15', 3, '3767837', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('28', 3, '2252569', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('21', 3, '3767801', datetime.datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'), ('23', 3, '3767805', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('30', 3, '2252533', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('19', 3, '3767803', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('31', 3, '2252535', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('20', 3, '3767809', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('32', 3, '2252539', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('16', 3, '3767835', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('33', 3, '2252567', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('34', 3, '2252565', datetime.datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'), ('17', 3, '3767833', datetime.datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'), ('35', 3, '2252563', datetime.datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'), ('18', 3, '3767831', datetime.datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'), ('36', 3, '2252537', datetime.datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'), ('22', 3, '3767807', datetime.datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'), ('37', 3, '1168027', datetime.datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1')], [('39', 4, '3824717', datetime.datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'), ('40', 4, '3824709', datetime.datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'), ('41', 4, '3824065', datetime.datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'), ('42', 4, '3824063', datetime.datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1')]]
"...我需要验证数据是否按排序显示..." 你不需要 groupby
,你只需要对数据:
all_case_data_sorted = sorted(
all_case_data, key = lambda x:(x[1],datetime.max-x[3]))
如果出于某种原因,您不能通过取反 select 一个键(就像我上面所做的那样),您可以多次排序,每个键一次。这是有效的,因为 Python 排序是稳定的:
all_case_data_sorted = sorted(all_case_data, key = lambda x: x[3], reverse=True)
all_case_data_sorted = sorted(all_case_data_sorted, key = lambda x: x[1])
如果您没有在循环中引用迭代变量,这通常表明您的逻辑有问题:
for thing in group:
groups.append(sorted(list(group), key=operator.itemgetter(3), reverse=True)) # Within each group, sort by date
uniquekeys.append(key)
你为什么循环 group
?如果你删除那个循环,看起来你应该得到你想要的。这就是为什么您缺少第一个元素的原因:您在那里使用它,因为 thing
已绑定到它。没有它,我得到(更改缩进以使其更易于阅读):
Uniquekeys: [2, 3, 4]
Groups: [[('01', 2, '2253415', datetime.datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1')],
[('02', 3, '3824819', datetime.datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'),
('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), [and so on]
[旁白:使用 pandas 通常可以更轻松地处理表格数据;按严重性和反转日期排序将是 data.sort(["severity", "date"], ascending=[True, False])
,依此类推。)
作为解决此类问题的替代方法和更 pythonic 的方法,您可以使用 collection.defaultdict
:
>>> from collections import defaultdict
>>> d=defaultdict(list)
>>> for i in all_case_data :
... d[i[1]].append(i)
...
>>> d.keys()
[2, 3, 4]
>>> d.values()
[[('01', 2, '2253415', datetime.datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1')], [('02', 3, '3824819', datetime.datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'), ('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), ('04', 3, '3824707', datetime.datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'), ('05', 3, '3824549', datetime.datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'), ('06', 3, '3824061', datetime.datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'), ('07', 3, '3822989', datetime.datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'), ('08', 3, '3822385', datetime.datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'), ('09', 3, '3822377', datetime.datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'), ('10', 3, '3820965', datetime.datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'), ('11', 3, '3820963', datetime.datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'), ('12', 3, '3767961', datetime.datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'), ('13', 3, '3767841', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('14', 3, '3767839', datetime.datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'), ('15', 3, '3767837', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('16', 3, '3767835', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('17', 3, '3767833', datetime.datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'), ('18', 3, '3767831', datetime.datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'), ('19', 3, '3767803', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('20', 3, '3767809', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('21', 3, '3767801', datetime.datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'), ('22', 3, '3767807', datetime.datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'), ('23', 3, '3767805', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('24', 3, '2257019', datetime.datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'), ('25', 3, '2256663', datetime.datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'), ('26', 3, '2252573', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('27', 3, '2252571', datetime.datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'), ('28', 3, '2252569', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('29', 3, '2252531', datetime.datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'), ('30', 3, '2252533', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('31', 3, '2252535', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('32', 3, '2252539', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('33', 3, '2252567', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('34', 3, '2252565', datetime.datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'), ('35', 3, '2252563', datetime.datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'), ('36', 3, '2252537', datetime.datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'), ('37', 3, '1168027', datetime.datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1')], [('38', 4, '3824817', datetime.datetime(2015, 4, 9, 14, 35, 36), 'New', 'user1'), ('39', 4, '3824717', datetime.datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'), ('40', 4, '3824709', datetime.datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'), ('41', 4, '3824065', datetime.datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'), ('42', 4, '3824063', datetime.datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1')]]