在 C# 中高效地将 DataTable 转换为列表
Efficient Converting DataTable to List in C#
我想在 C# 中将 DataTable 序列化为一个列表。我的代码大约需要 10 分钟来获取 100K 数据并将其转换为列表。以前尝试过顺序方式,但处理时间增加了三倍。所以,我尝试了 Parallel,它节省了 1/3 的时间。但是,还是太慢了。
如果有人可以帮助加快速度。感谢任何帮助。
public async Task<List<DataPointModel>> GetDetails(HashParameterModel parameterModel)
{
List<DataPointModel> result = new List<DataPointModel>();
try
{
var query = "USP_GetDetailsFromMetaData";
using (var sqlConnection = new SqlConnection(connectionString))
{
using (var sqlCommand = new SqlCommand(query, sqlConnection))
{
sqlCommand.CommandType = CommandType.StoredProcedure;
sqlCommand.Parameters.AddWithValue("@GroupID", parameterModel.GroupID);
sqlCommand.CommandTimeout = 0;
sqlConnection.Open();
DataTable dataTable = new DataTable();
DataSet dataSet = new DataSet();
SqlDataAdapter da = new SqlDataAdapter
{
SelectCommand = sqlCommand
};
da.Fill(dataSet);
dataTable = dataSet.Tables[0];
DataTable dataTable1 = dataSet.Tables[1];
var questionList = dataTable1.AsEnumerable();
if (dataTable.Rows.Count > 0)
{
Parallel.ForEach(dataTable.AsEnumerable(), new ParallelOptions { MaxDegreeOfParallelism = 5 }, row =>
{
DataPointModel model = new DataPointModel();
model.ID = Convert.ToInt32(row["ID"]);
model.GroupID = Convert.ToInt32(row["GroupID"]);
model.ProviderID = Convert.ToInt32(row["SalvageProviderID"]);
model.ClaimNumber = row["ClaimNumber"].ToString();
model.PolicyNumber = row["PolicyNumber"].ToString();
model.DataPoint1 = row["DataPoint1"].ToString();
model.DataPoint2 = row["DataPoint2"].ToString();
model.DataPoint3 = row["DataPoint3"].ToString();
model.DataPoint4 = row["DataPoint4"].ToString();
model.FirstName = row["FirstName"].ToString();
model.LastName = row["LastName"].ToString();
model.PrimaryDamage = row["PrimaryDamage"].ToString();
model.Type = row["TypeCode"].ToString();
model.LossDate = row["LossDate"].ToString();
model.QuestionList = (from p in questionList
where p.Field<int>("ID") == model.ID
select new QuestionResponseModel()
{
QuestionID = p.Field<int>("QuestionID").ToString(),
Response = p.Field<string>("ResponseValue")
}).ToList();
result.Add(model);
});
}
}
}
}
catch (Exception ex)
{
throw ex;
}
return result;
}
DataSet有两个DataTable
DataTable dataTable = dataSet.Tables[0]; // Details
DataTable dataTable1 = dataSet.Tables[1]; // QUestionList
我认为它在 QuestionList 间歇循环时消耗了时间,它可能有大约 120K 行。任何建议
实现性能改进的一种简单快速的方法是从您的 questionList
构建一个查找 table,然后访问它来获取问题而不是执行这段代码
model.QuestionList = (from p in questionList
where p.Field<int>("ID") == model.ID
select new QuestionResponseModel()
{
QuestionID = p.Field<int>("QuestionID").ToString(),
Response = p.Field<string>("ResponseValue")
}).ToList();
所以添加以下内容
var questionList = dataTable1.AsEnumerable();
//maybe add .AsParallel() - questionList.AsParallel().ToLookUp(...)
var questionLookUp = questionList.ToLookUp(x => x.Field<int>("ID"), x => new QuestionResponseModel() { QuestionID = x.Field<int>("QuestionID"), Response = p.Field<string>("ResponseValue") });
而不是像这样使用它
model.QuestionList = questionLookUp[model.ID].ToList();
https://docs.microsoft.com/en-us/dotnet/api/system.linq.lookup-2
https://docs.microsoft.com/en-us/dotnet/api/system.linq.enumerable.tolookup
代码未经测试,希望我没有犯太多错误。
详细信息中是否存在重复的模型 ID table?如果是这样,这可能有助于避免 运行 多次查询问题列表:
model.QuestionList = getQuestions(model.ID);
方法:
public Dictionary<int, List<QuestionResponseModel>> questionBuffer = new Dictionary<int, List<QuestionResponseModel>>();
public List<QuestionResponseModel> getQuestions(int ID)
{
if (questionBuffer.ContainsKey(ID)) return questionBuffer[ID];
List<QuestionResponseModel> questions = (from p in questionList
where p.Field<int>("ID") == model.ID
select new QuestionResponseModel()
{
QuestionID = p.Field<int>("QuestionID").ToString(),
Response = p.Field<string>("ResponseValue")
}).ToList();
questionBuffer.Add(ID, questions);
return questions;
}
我想在 C# 中将 DataTable 序列化为一个列表。我的代码大约需要 10 分钟来获取 100K 数据并将其转换为列表。以前尝试过顺序方式,但处理时间增加了三倍。所以,我尝试了 Parallel,它节省了 1/3 的时间。但是,还是太慢了。
如果有人可以帮助加快速度。感谢任何帮助。
public async Task<List<DataPointModel>> GetDetails(HashParameterModel parameterModel)
{
List<DataPointModel> result = new List<DataPointModel>();
try
{
var query = "USP_GetDetailsFromMetaData";
using (var sqlConnection = new SqlConnection(connectionString))
{
using (var sqlCommand = new SqlCommand(query, sqlConnection))
{
sqlCommand.CommandType = CommandType.StoredProcedure;
sqlCommand.Parameters.AddWithValue("@GroupID", parameterModel.GroupID);
sqlCommand.CommandTimeout = 0;
sqlConnection.Open();
DataTable dataTable = new DataTable();
DataSet dataSet = new DataSet();
SqlDataAdapter da = new SqlDataAdapter
{
SelectCommand = sqlCommand
};
da.Fill(dataSet);
dataTable = dataSet.Tables[0];
DataTable dataTable1 = dataSet.Tables[1];
var questionList = dataTable1.AsEnumerable();
if (dataTable.Rows.Count > 0)
{
Parallel.ForEach(dataTable.AsEnumerable(), new ParallelOptions { MaxDegreeOfParallelism = 5 }, row =>
{
DataPointModel model = new DataPointModel();
model.ID = Convert.ToInt32(row["ID"]);
model.GroupID = Convert.ToInt32(row["GroupID"]);
model.ProviderID = Convert.ToInt32(row["SalvageProviderID"]);
model.ClaimNumber = row["ClaimNumber"].ToString();
model.PolicyNumber = row["PolicyNumber"].ToString();
model.DataPoint1 = row["DataPoint1"].ToString();
model.DataPoint2 = row["DataPoint2"].ToString();
model.DataPoint3 = row["DataPoint3"].ToString();
model.DataPoint4 = row["DataPoint4"].ToString();
model.FirstName = row["FirstName"].ToString();
model.LastName = row["LastName"].ToString();
model.PrimaryDamage = row["PrimaryDamage"].ToString();
model.Type = row["TypeCode"].ToString();
model.LossDate = row["LossDate"].ToString();
model.QuestionList = (from p in questionList
where p.Field<int>("ID") == model.ID
select new QuestionResponseModel()
{
QuestionID = p.Field<int>("QuestionID").ToString(),
Response = p.Field<string>("ResponseValue")
}).ToList();
result.Add(model);
});
}
}
}
}
catch (Exception ex)
{
throw ex;
}
return result;
}
DataSet有两个DataTable
DataTable dataTable = dataSet.Tables[0]; // Details
DataTable dataTable1 = dataSet.Tables[1]; // QUestionList
我认为它在 QuestionList 间歇循环时消耗了时间,它可能有大约 120K 行。任何建议
实现性能改进的一种简单快速的方法是从您的 questionList
构建一个查找 table,然后访问它来获取问题而不是执行这段代码
model.QuestionList = (from p in questionList
where p.Field<int>("ID") == model.ID
select new QuestionResponseModel()
{
QuestionID = p.Field<int>("QuestionID").ToString(),
Response = p.Field<string>("ResponseValue")
}).ToList();
所以添加以下内容
var questionList = dataTable1.AsEnumerable();
//maybe add .AsParallel() - questionList.AsParallel().ToLookUp(...)
var questionLookUp = questionList.ToLookUp(x => x.Field<int>("ID"), x => new QuestionResponseModel() { QuestionID = x.Field<int>("QuestionID"), Response = p.Field<string>("ResponseValue") });
而不是像这样使用它
model.QuestionList = questionLookUp[model.ID].ToList();
https://docs.microsoft.com/en-us/dotnet/api/system.linq.lookup-2
https://docs.microsoft.com/en-us/dotnet/api/system.linq.enumerable.tolookup
代码未经测试,希望我没有犯太多错误。
详细信息中是否存在重复的模型 ID table?如果是这样,这可能有助于避免 运行 多次查询问题列表:
model.QuestionList = getQuestions(model.ID);
方法:
public Dictionary<int, List<QuestionResponseModel>> questionBuffer = new Dictionary<int, List<QuestionResponseModel>>();
public List<QuestionResponseModel> getQuestions(int ID)
{
if (questionBuffer.ContainsKey(ID)) return questionBuffer[ID];
List<QuestionResponseModel> questions = (from p in questionList
where p.Field<int>("ID") == model.ID
select new QuestionResponseModel()
{
QuestionID = p.Field<int>("QuestionID").ToString(),
Response = p.Field<string>("ResponseValue")
}).ToList();
questionBuffer.Add(ID, questions);
return questions;
}