有没有更有效的方法来比较两个列表的项目并找到 leftOuterItems、rightOuterItems 和 matchedItems?
Is there a more efficient way to compare the items of two lists and find leftOuterItems, rightOuterItems and matchedItems?
我们经常需要一种方法来比较两个列表的项目,找出哪些项目只存在于ListA(leftOuterItems),哪些项目仅存在于ListB(rightOuterItems)及其共同项目(matchedItems)...
我最终得到了两个解决方案,如下所示:
一种方法是对列表进行排序并逐个迭代(当集合由于排序而导致项目过多时会降低性能),另一种方法是使用字典和散列(比当集合有几个项目时的第一种方式 - 由于内存分配等)
*另外请记住,我想比较两个对象列表,例如两个 class 人(不仅仅是基元)列表。这就是我创建通用扩展方法的原因
那么,你有什么更好的建议吗?
在此先感谢您!
class Program
{
static void Main(string[] args)
{
var fewItemsList1 = new[] { 1, 4, 2, 3, 7, 6, 9, 5 };
var fewItemsList2 = new[] { 15, 5, 14, 6, 13, 7, 12, 8, 11, 9, 10 };
Run(100_000, fewItemsList1, fewItemsList2);
var manyItemsList1 = Enumerable.Range(0, 100_000).ToArray();
var manyItemsList2 = Enumerable.Range(50000, 150_000).ToArray();
Run(1000, manyItemsList1, manyItemsList2);
Console.WriteLine("Hello World!");
Console.Read();
}
private static void Run(int count, int[] l1, int[] l2)
{
var sw = Stopwatch.StartNew();
for (int i = 0; i < count; i++)
l1.OrderedCompare(l2, x => x, x => x, out int[] leftOuterItems, out int[] rightOuterItems, out (int, int)[] matchedItems);
sw.Stop();
Console.WriteLine($"OrderedCompare for {count} iterations with L1 items:{l1.Count()} and L2 items:{l2.Count()} took {sw.Elapsed}");
sw.Restart();
for (int i = 0; i < count; i++)
l1.HashedCompare(l2, x => x, x => x, out int[] leftOuterItems2, out int[] rightOuterItems2, out (int, int)[] matchedItems2);
Console.WriteLine($"HashedCompare for {count} with L1 items:{l1.Count()} and L2 items:{l2.Count()} iterations took {sw.Elapsed}");
}
}
public static class Extensions
{
public static void OrderedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out T1[] leftOuterItems,
out T2[] rightOuterItems,
out (T1, T2)[] matchedItems) where TKey : IComparable<TKey>
{
var leftOuterItemsList = new List<T1>();
var rightOuterItemsList = new List<T2>();
var matchedItemsList = new List<(T1, T2)>();
source = source.OrderBy(x => sourceKeyGetter(x)).ToArray();
target = target.OrderBy(x => targetKeyGetter(x)).ToArray();
bool reverseCompare = false;
int i = 0, j = 0, sourcZeroBasedCount = source.Count() - 1, targetZeroBaseCount = target.Count() - 1;
while (true)
{
var end = i == sourcZeroBasedCount && j == targetZeroBaseCount;
var sourceItem = source.ElementAt(i);
var targetItem = target.ElementAt(j);
var sourceKey = sourceKeyGetter(sourceItem);
var targetKey = targetKeyGetter(targetItem);
int diff = reverseCompare ? targetKey.CompareTo(sourceKey) : sourceKey.CompareTo(targetKey);
reverseCompare = i == sourcZeroBasedCount || j == targetZeroBaseCount;
switch (diff)
{
case -1:
leftOuterItemsList.Add(sourceItem);
i = i < sourcZeroBasedCount ? i + 1 : i;
break;
case 0:
matchedItemsList.Add((sourceItem, targetItem));
i = i < sourcZeroBasedCount ? i + 1 : i;
j = j < targetZeroBaseCount ? j + 1 : j;
break;
case 1:
rightOuterItemsList.Add(targetItem);
j = j < targetZeroBaseCount ? j + 1 : j;
break;
}
if (end)
break;
}
leftOuterItems = leftOuterItemsList.ToArray();
rightOuterItems = rightOuterItemsList.ToArray();
matchedItems = matchedItemsList.ToArray();
}
public static void HashedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out T1[] leftOuterItems,
out T2[] rightOuterItems,
out (T1, T2)[] matchedItems) where TKey : IComparable<TKey>
{
var sourceDic = source.ToDictionary(x => sourceKeyGetter(x));
var targetDic = target.ToDictionary(x => targetKeyGetter(x));
var leftOuterKeys = sourceDic.Keys.Except(targetDic.Keys).ToArray();
var rightOuterKeys = targetDic.Keys.Except(sourceDic.Keys).ToArray();
var matchedKeys = sourceDic.Keys.Concat(targetDic.Keys).Except(leftOuterKeys.Concat(rightOuterKeys)).ToArray();
leftOuterItems = leftOuterKeys.Select(key => sourceDic[key]).ToArray();
rightOuterItems = rightOuterKeys.Select(key => targetDic[key]).ToArray();
matchedItems = matchedKeys.Select(key => (sourceDic[key], targetDic[key])).ToArray();
}
}
您可以使用 Except
和 Intersect
,它们都可以在 Sets
(轻量级哈希集)上工作,并且可以工作 O(n) 线性时间复杂度
var list1 = new[] { 1, 4, 2, 3, 7, 6, 9, 5 };
var List2 = new[] { 15, 5, 14, 6, 13, 7, 12, 8, 11, 9, 10 };
Console.WriteLine(string.Join(", ", list1.Except(List2)));
Console.WriteLine(string.Join(", ", List2.Except(list1)));
Console.WriteLine(string.Join(", ", List2.Intersect(list1)));
输出
1, 4, 2, 3
15, 14, 13, 12, 8, 11, 10
5, 6, 7, 9
至于它是快还是慢,你必须基准测试,但我的直觉是它们会更有效率和更快。
关于基准测试
使用经过可靠测试的框架,例如 BenchmarkDotNet,如果您自己滚动,您可能会以无限种方式得到错误的结果
HashedCompare()
中的大部分低效率归因于字典中不必要的枚举和查找。如果您以命令式风格编写算法,则可以避免所有这些,并且代码变得更容易理解:
我支持 @00110001 建议您应该使用适当的基准测试框架,因为不同实现之间的差异具有相同的复杂程度。
public static void HashedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out List<T1> leftOuterItems,
out List<T2> rightOuterItems,
out List<(T1, T2)> matchedItems) where TKey : IEquatable<TKey>
{
var sourceItems = source.ToDictionary(x => sourceKeyGetter(x));
var targetItems = target.ToDictionary(x => targetKeyGetter(x));
matchedItems = new List<(T1, T2)>();
leftOuterItems = new List<T1>();
rightOuterItems = new List<T2>();
foreach (var sourceItem in sourceItems)
{
if (targetItems.TryGetValue(sourceItem.Key, out var targetItem))
matchedItems.Add((sourceItem.Value, targetItem));
else
leftOuterItems.Add(sourceItem.Value);
}
foreach (var targetItem in targetItems)
{
if (!sourceItems.ContainsKey(targetItem.Key))
rightOuterItems.Add(targetItem.Value);
}
}
我们经常需要一种方法来比较两个列表的项目,找出哪些项目只存在于ListA(leftOuterItems),哪些项目仅存在于ListB(rightOuterItems)及其共同项目(matchedItems)...
我最终得到了两个解决方案,如下所示:
一种方法是对列表进行排序并逐个迭代(当集合由于排序而导致项目过多时会降低性能),另一种方法是使用字典和散列(比当集合有几个项目时的第一种方式 - 由于内存分配等)
*另外请记住,我想比较两个对象列表,例如两个 class 人(不仅仅是基元)列表。这就是我创建通用扩展方法的原因
那么,你有什么更好的建议吗?
在此先感谢您!
class Program
{
static void Main(string[] args)
{
var fewItemsList1 = new[] { 1, 4, 2, 3, 7, 6, 9, 5 };
var fewItemsList2 = new[] { 15, 5, 14, 6, 13, 7, 12, 8, 11, 9, 10 };
Run(100_000, fewItemsList1, fewItemsList2);
var manyItemsList1 = Enumerable.Range(0, 100_000).ToArray();
var manyItemsList2 = Enumerable.Range(50000, 150_000).ToArray();
Run(1000, manyItemsList1, manyItemsList2);
Console.WriteLine("Hello World!");
Console.Read();
}
private static void Run(int count, int[] l1, int[] l2)
{
var sw = Stopwatch.StartNew();
for (int i = 0; i < count; i++)
l1.OrderedCompare(l2, x => x, x => x, out int[] leftOuterItems, out int[] rightOuterItems, out (int, int)[] matchedItems);
sw.Stop();
Console.WriteLine($"OrderedCompare for {count} iterations with L1 items:{l1.Count()} and L2 items:{l2.Count()} took {sw.Elapsed}");
sw.Restart();
for (int i = 0; i < count; i++)
l1.HashedCompare(l2, x => x, x => x, out int[] leftOuterItems2, out int[] rightOuterItems2, out (int, int)[] matchedItems2);
Console.WriteLine($"HashedCompare for {count} with L1 items:{l1.Count()} and L2 items:{l2.Count()} iterations took {sw.Elapsed}");
}
}
public static class Extensions
{
public static void OrderedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out T1[] leftOuterItems,
out T2[] rightOuterItems,
out (T1, T2)[] matchedItems) where TKey : IComparable<TKey>
{
var leftOuterItemsList = new List<T1>();
var rightOuterItemsList = new List<T2>();
var matchedItemsList = new List<(T1, T2)>();
source = source.OrderBy(x => sourceKeyGetter(x)).ToArray();
target = target.OrderBy(x => targetKeyGetter(x)).ToArray();
bool reverseCompare = false;
int i = 0, j = 0, sourcZeroBasedCount = source.Count() - 1, targetZeroBaseCount = target.Count() - 1;
while (true)
{
var end = i == sourcZeroBasedCount && j == targetZeroBaseCount;
var sourceItem = source.ElementAt(i);
var targetItem = target.ElementAt(j);
var sourceKey = sourceKeyGetter(sourceItem);
var targetKey = targetKeyGetter(targetItem);
int diff = reverseCompare ? targetKey.CompareTo(sourceKey) : sourceKey.CompareTo(targetKey);
reverseCompare = i == sourcZeroBasedCount || j == targetZeroBaseCount;
switch (diff)
{
case -1:
leftOuterItemsList.Add(sourceItem);
i = i < sourcZeroBasedCount ? i + 1 : i;
break;
case 0:
matchedItemsList.Add((sourceItem, targetItem));
i = i < sourcZeroBasedCount ? i + 1 : i;
j = j < targetZeroBaseCount ? j + 1 : j;
break;
case 1:
rightOuterItemsList.Add(targetItem);
j = j < targetZeroBaseCount ? j + 1 : j;
break;
}
if (end)
break;
}
leftOuterItems = leftOuterItemsList.ToArray();
rightOuterItems = rightOuterItemsList.ToArray();
matchedItems = matchedItemsList.ToArray();
}
public static void HashedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out T1[] leftOuterItems,
out T2[] rightOuterItems,
out (T1, T2)[] matchedItems) where TKey : IComparable<TKey>
{
var sourceDic = source.ToDictionary(x => sourceKeyGetter(x));
var targetDic = target.ToDictionary(x => targetKeyGetter(x));
var leftOuterKeys = sourceDic.Keys.Except(targetDic.Keys).ToArray();
var rightOuterKeys = targetDic.Keys.Except(sourceDic.Keys).ToArray();
var matchedKeys = sourceDic.Keys.Concat(targetDic.Keys).Except(leftOuterKeys.Concat(rightOuterKeys)).ToArray();
leftOuterItems = leftOuterKeys.Select(key => sourceDic[key]).ToArray();
rightOuterItems = rightOuterKeys.Select(key => targetDic[key]).ToArray();
matchedItems = matchedKeys.Select(key => (sourceDic[key], targetDic[key])).ToArray();
}
}
您可以使用 Except
和 Intersect
,它们都可以在 Sets
(轻量级哈希集)上工作,并且可以工作 O(n) 线性时间复杂度
var list1 = new[] { 1, 4, 2, 3, 7, 6, 9, 5 };
var List2 = new[] { 15, 5, 14, 6, 13, 7, 12, 8, 11, 9, 10 };
Console.WriteLine(string.Join(", ", list1.Except(List2)));
Console.WriteLine(string.Join(", ", List2.Except(list1)));
Console.WriteLine(string.Join(", ", List2.Intersect(list1)));
输出
1, 4, 2, 3
15, 14, 13, 12, 8, 11, 10
5, 6, 7, 9
至于它是快还是慢,你必须基准测试,但我的直觉是它们会更有效率和更快。
关于基准测试
使用经过可靠测试的框架,例如 BenchmarkDotNet,如果您自己滚动,您可能会以无限种方式得到错误的结果
HashedCompare()
中的大部分低效率归因于字典中不必要的枚举和查找。如果您以命令式风格编写算法,则可以避免所有这些,并且代码变得更容易理解:
我支持 @00110001 建议您应该使用适当的基准测试框架,因为不同实现之间的差异具有相同的复杂程度。
public static void HashedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out List<T1> leftOuterItems,
out List<T2> rightOuterItems,
out List<(T1, T2)> matchedItems) where TKey : IEquatable<TKey>
{
var sourceItems = source.ToDictionary(x => sourceKeyGetter(x));
var targetItems = target.ToDictionary(x => targetKeyGetter(x));
matchedItems = new List<(T1, T2)>();
leftOuterItems = new List<T1>();
rightOuterItems = new List<T2>();
foreach (var sourceItem in sourceItems)
{
if (targetItems.TryGetValue(sourceItem.Key, out var targetItem))
matchedItems.Add((sourceItem.Value, targetItem));
else
leftOuterItems.Add(sourceItem.Value);
}
foreach (var targetItem in targetItems)
{
if (!sourceItems.ContainsKey(targetItem.Key))
rightOuterItems.Add(targetItem.Value);
}
}