减少 .NET Core 中公共列表 属性 比较的数量
Reduce the number of common list property comparison in .NET Core
我有两个一对多关系的table,在这个例子中用Customers
(一个)和CustomersInfo
(多个)表示。
using System.Collections.Generic;
namespace Domain.Entities
{
public class CustomersAggregate
{
public CustomersAggregate(int id, string name)
{
Id = id;
Name = name;
}
public int Id { get; set; }
public string Name { get; set; }
public IEnumerable<CustomersInfo> CustomersInfo { get; set; }
}
public class CustomersInfo
{
public CustomersInfo(int customerId, string email)
{
CustomerId = customerId;
Email = email;
}
public CustomersInfo(int customerId, string email, string name)
{
CustomerId = customerId;
Email = email;
Name = name;
}
public int CustomerId { get; set; }
public string Email { get; set; }
public string Name { get; set; }
public CustomersAggregate Customers { get; set; }
}
}
在真实场景中,我必须读取包含两列的 .xlsx 文件:Name
和 Email
。为了插入客户 table,我按名称分组,然后 select 每个分组的第一个,然后插入客户 table。因此,在 Customers table 中,所有名称都是不同的。之后,我再次读取 .xlsx 文件并将已插入 Customers 中的名称与文件名称列中读取的值进行比较。如果 CustomersInfo.CustomerId 相同,则接收 Customers.Id。下面我代表描述的:
using Domain.Entities;
using System;
using System.Collections.Generic;
namespace Domain
{
public static class Program
{
public static void Main()
{
IEnumerable<Customers> customers = new List<Customers>() {
new Customers(1, "Gustavo"),
new Customers(2, "Lilian"),
new Customers(3, "Ruan"),
new Customers(4, "Zeze"),
new Customers(5, "Augusto"),
new Customers(6, "Maicon"),
new Customers(7, "Nadia")
};
IEnumerable<CustomersInfo> customersInfos = new List<CustomersInfo>()
{
new CustomersInfo(0, "gustavo@costa.com", "Gustavo"),
new CustomersInfo(0, "gustavo@mcosta.com", "Gustavo"),
new CustomersInfo(0, "lilian@costa.com", "Lilian"),
new CustomersInfo(0, "lian@costa.com", "Lilian"),
new CustomersInfo(0, "ruan@co3sta.com", "Ruan"),
new CustomersInfo(0, "ruan@c2osta.com", "Ruan"),
new CustomersInfo(0, "ruan@1costa.com", "Ruan"),
new CustomersInfo(0, "zeze@costa.com", "Zeze"),
new CustomersInfo(0, "zeze@sscosta.com", "Zeze"),
new CustomersInfo(0, "austo@lentini.com", "Augusto"),
new CustomersInfo(0, "austo@lib.com", "Augusto"),
new CustomersInfo(0, "maicon@22coa.com", "Maicon"),
new CustomersInfo(0, "nadia@22c.com", "Nadia"),
};
var ci = new List<CustomersInfo>();
foreach (var customer in customers)
{
foreach (var customerInfo in customersInfos)
{
if (customer.Name == customerInfo.Name)
{
ci.Add(new CustomersInfo(customer.Id, customerInfo.Email));
}
}
}
ci.ForEach(x =>
{
Console.WriteLine(string.Concat(x.CustomerId, " - ", x.Email));
});
}
}
}
问题是 .xlsx 文件有大约 7000 条记录。对select不同客户的姓名进行分组后,Customerstable有3000行,然后与文件中的7000条记录进行比较,所以最后进行3000 x 7000次比较必要的,这显然会使程序非常慢。我认为提高绩效的一种方法是按字母顺序对客户进行排序,根据客户的中心项目将他们分成两个列表,并仅在这些列表内进行比较。理想情况下,在下面的比较中,如果 customerInfo.Name 的前导字母不是 customerInfo 的首字母,则 customerInfo.Name 不会与 customer.Name 进行比较。
if (customer.Name == customerInfo.Name)
但是,我不知道如何实现它。有人可以帮忙吗?
您可以使用 LINQ 的方法 GroupBy
和 ToDictionary
:
var dict = customersInfos
.GroupBy(ci => ci.Name)
.ToDictionary(g => g.Key);
foreach (var customer in customers)
{
if(dict.ContainsKey(customer.Name))
{
ci.AddRange(dict[customer.Name].Select(cInf => new CustomersInfo(customer.Id, cInf.Email)));
}
}
这应该将操作数从原来的 O(customer.Lenth * customersInfos.Length)
减少到 O(customer.Lenth + customersInfos.Length)
通过找出瓶颈尝试使用算法方法。瓶颈在于,对于每个 customer
整个列表 customerInfos
扫描以确保是否有 email
匹配。需要 O(n).
通过customer.Name
查找email
的操作可以使用Dictionary<string,string>
数据结构来完成。按 customer.Name
进行搜索需要 O(1)
var dict = customerInfos.ToDictionary(x => x.Email, x => x.Name);
foreach (var customer in customers)
if (dict.ContainsKey(customer.Name))
ci.Add(new CustomersInfo(customer.Id, dict[customer.Name]));
我有两个一对多关系的table,在这个例子中用Customers
(一个)和CustomersInfo
(多个)表示。
using System.Collections.Generic;
namespace Domain.Entities
{
public class CustomersAggregate
{
public CustomersAggregate(int id, string name)
{
Id = id;
Name = name;
}
public int Id { get; set; }
public string Name { get; set; }
public IEnumerable<CustomersInfo> CustomersInfo { get; set; }
}
public class CustomersInfo
{
public CustomersInfo(int customerId, string email)
{
CustomerId = customerId;
Email = email;
}
public CustomersInfo(int customerId, string email, string name)
{
CustomerId = customerId;
Email = email;
Name = name;
}
public int CustomerId { get; set; }
public string Email { get; set; }
public string Name { get; set; }
public CustomersAggregate Customers { get; set; }
}
}
在真实场景中,我必须读取包含两列的 .xlsx 文件:Name
和 Email
。为了插入客户 table,我按名称分组,然后 select 每个分组的第一个,然后插入客户 table。因此,在 Customers table 中,所有名称都是不同的。之后,我再次读取 .xlsx 文件并将已插入 Customers 中的名称与文件名称列中读取的值进行比较。如果 CustomersInfo.CustomerId 相同,则接收 Customers.Id。下面我代表描述的:
using Domain.Entities;
using System;
using System.Collections.Generic;
namespace Domain
{
public static class Program
{
public static void Main()
{
IEnumerable<Customers> customers = new List<Customers>() {
new Customers(1, "Gustavo"),
new Customers(2, "Lilian"),
new Customers(3, "Ruan"),
new Customers(4, "Zeze"),
new Customers(5, "Augusto"),
new Customers(6, "Maicon"),
new Customers(7, "Nadia")
};
IEnumerable<CustomersInfo> customersInfos = new List<CustomersInfo>()
{
new CustomersInfo(0, "gustavo@costa.com", "Gustavo"),
new CustomersInfo(0, "gustavo@mcosta.com", "Gustavo"),
new CustomersInfo(0, "lilian@costa.com", "Lilian"),
new CustomersInfo(0, "lian@costa.com", "Lilian"),
new CustomersInfo(0, "ruan@co3sta.com", "Ruan"),
new CustomersInfo(0, "ruan@c2osta.com", "Ruan"),
new CustomersInfo(0, "ruan@1costa.com", "Ruan"),
new CustomersInfo(0, "zeze@costa.com", "Zeze"),
new CustomersInfo(0, "zeze@sscosta.com", "Zeze"),
new CustomersInfo(0, "austo@lentini.com", "Augusto"),
new CustomersInfo(0, "austo@lib.com", "Augusto"),
new CustomersInfo(0, "maicon@22coa.com", "Maicon"),
new CustomersInfo(0, "nadia@22c.com", "Nadia"),
};
var ci = new List<CustomersInfo>();
foreach (var customer in customers)
{
foreach (var customerInfo in customersInfos)
{
if (customer.Name == customerInfo.Name)
{
ci.Add(new CustomersInfo(customer.Id, customerInfo.Email));
}
}
}
ci.ForEach(x =>
{
Console.WriteLine(string.Concat(x.CustomerId, " - ", x.Email));
});
}
}
}
问题是 .xlsx 文件有大约 7000 条记录。对select不同客户的姓名进行分组后,Customerstable有3000行,然后与文件中的7000条记录进行比较,所以最后进行3000 x 7000次比较必要的,这显然会使程序非常慢。我认为提高绩效的一种方法是按字母顺序对客户进行排序,根据客户的中心项目将他们分成两个列表,并仅在这些列表内进行比较。理想情况下,在下面的比较中,如果 customerInfo.Name 的前导字母不是 customerInfo 的首字母,则 customerInfo.Name 不会与 customer.Name 进行比较。
if (customer.Name == customerInfo.Name)
但是,我不知道如何实现它。有人可以帮忙吗?
您可以使用 LINQ 的方法 GroupBy
和 ToDictionary
:
var dict = customersInfos
.GroupBy(ci => ci.Name)
.ToDictionary(g => g.Key);
foreach (var customer in customers)
{
if(dict.ContainsKey(customer.Name))
{
ci.AddRange(dict[customer.Name].Select(cInf => new CustomersInfo(customer.Id, cInf.Email)));
}
}
这应该将操作数从原来的 O(customer.Lenth * customersInfos.Length)
减少到 O(customer.Lenth + customersInfos.Length)通过找出瓶颈尝试使用算法方法。瓶颈在于,对于每个 customer
整个列表 customerInfos
扫描以确保是否有 email
匹配。需要 O(n).
通过customer.Name
查找email
的操作可以使用Dictionary<string,string>
数据结构来完成。按 customer.Name
进行搜索需要 O(1)
var dict = customerInfos.ToDictionary(x => x.Email, x => x.Name);
foreach (var customer in customers)
if (dict.ContainsKey(customer.Name))
ci.Add(new CustomersInfo(customer.Id, dict[customer.Name]));