使用多个并行查询时的性能问题 - SqlClient
Performance issue when using multiple parallel queries - SqlClient
我注意到在更改服务器并尝试为某些数据库密集型任务增加工作线程数后我的应用程序出现性能问题。
经过一些测试,我发现问题出在从 dataReader 读取数据。在 30 个线程上执行简单查询比在单线程上执行至少慢 15 倍。使用 PerfView 我发现大部分时间都浪费在 BLOCKED_TIME.
上
为了进行测试,我使用带有 Ryzen Threadripper(32 核/64 线程)的服务器和 SqlServer 的本地实例。在具有相似规格的生产服务器上得到相同的结果。
我已经尝试了 运行 30 个应用程序实例 - 2-3 和 30 个实例之间的性能几乎没有差异,因此服务器性能足以进行 30 个并行查询。
我已经尝试对连接字符串进行一些更改,例如 increase/decrese min/max 池大小、禁用池、将 LCP 更改为 TCP - 没有结果。
class Program
{
static void Main(string[] args)
{
var ids = new List<Guid>() { ... }; //filled by database ids
var stats = new ConcurrentBag<long>();
//warmup
stats.Add(TestMethod());
Console.WriteLine(String.Format("|{0}|{1,5}ms|", "warmup", stats.Average()));
//start 1 to 30 threads (test on server with 32 cores / 64 threads)
for (int i = 1; i <= 30; i++)
{
stats = new ConcurrentBag<long>();
var tasks = Enumerable.Range(0, i).Select(idx =>
{
var id = ids[idx]; // separate ids to be sure we're not reading same records from disk
return Task.Run(() =>
{
for (int j = 0; j < 20; j++)
{
stats.Add(TestMethod(id));
}
});
}).ToArray();
Task.WaitAll(tasks);
Console.WriteLine(String.Format("|{0,2}|{1,5}ms|", i, (int)stats.Average()));
}
Console.WriteLine("End");
Console.ReadLine();
}
private static long TestMethod()
{
var records = new List<object[]>();
var sw = new Stopwatch();
using (var connection = new SqlConnection(ConnectionString))
{
connection.Open();
using (var transaction = connection.BeginTransaction())
using (var command = connection.CreateCommand())
{
command.Transaction = transaction;
command.CommandText = SqlQuery;
command.Parameters.Add(new SqlParameter("id", id));
// measure only dataReader time
sw.Start();
using (var dataReader = command.ExecuteReader())
{
// got ~2000 rows from query
while (dataReader.Read())
{
//read all data from row, test on Guid
var values = new object[6];
dataReader.GetValues(values);
records.Add(values);
}
}
sw.Stop();
}
}
return sw.ElapsedMilliseconds;
}
有什么方法可以提高性能并使我的应用程序可根据线程数进行扩展吗?
编辑。
要重现的数据库结构和示例查询:
/****** Object: Table [dbo].[Table_1] Script Date: 05.07.2019 14:08:15 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Table_1](
[Id] [uniqueidentifier] NOT NULL,
[Ref1] [uniqueidentifier] NULL,
[Field1] [uniqueidentifier] NULL,
[Field2] [uniqueidentifier] NULL,
CONSTRAINT [PK_Table_1] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object: Table [dbo].[Table_2] Script Date: 05.07.2019 14:08:15 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Table_2](
[Id] [uniqueidentifier] NOT NULL,
[Field1] [uniqueidentifier] NULL,
CONSTRAINT [PK_Table_2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object: Index [IDX_Table_1_Ref1] Script Date: 05.07.2019 14:08:15 ******/
CREATE NONCLUSTERED INDEX [IDX_Table_1_Ref1] ON [dbo].[Table_1]
(
[Ref1] ASC
)
INCLUDE ( [Field1],
[Field2]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = OFF) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Table_1] WITH CHECK ADD CONSTRAINT [FK_Table_1_Table_2] FOREIGN KEY([Ref1])
REFERENCES [dbo].[Table_2] ([Id])
GO
ALTER TABLE [dbo].[Table_1] CHECK CONSTRAINT [FK_Table_1_Table_2]
GO
select
t2.id as Id,
t2.Field1 as Field1,
t1.Id as T1_Id,
t1.Ref1 as T1_T2,
t1.Field1 as T1_Field1,
t1.Field2 as T1_Field2
from dbo.Table_2 t2
join dbo.Table_1 t1 on t1.Ref1 = t2.Id
where t2.id = @id
T1 中现在有 30 条记录,T1 中有 2000 * 30 条记录,因此每个线程都处理具有 30 条记录的同一个数据集。随机填充的数据 newid().
edit2.
我还在案例中比较了这个解决方案 - Sql 服务器上的 30 个独立进程与 1 个进程和 30 个线程。 30 个单独的进程工作正常 - 大约是原始执行时间的 150%,而不是 1500%。
最大的区别——有 30 个独立的进程和单线程,我有 ~14 个等待任务和 20k 批处理请求/秒,有单进程和 30 个线程,我有 > 30 个等待任务(主要在网络 I/O 上)和 2k 批处理请求/秒
设置
"System.GC.Server": true
解决了我的问题,现在它扩展到服务器上的最大可用线程。感谢您的帮助!
检查您的 GC 设置。
https://www.dotnetcurry.com/csharp/1471/garbage-collection-csharp-dotnet-core
设置参数
ServerGarbageCollection = true
ConcurrentGarbageCollection = false
可能会有帮助。 :)
我注意到在更改服务器并尝试为某些数据库密集型任务增加工作线程数后我的应用程序出现性能问题。
经过一些测试,我发现问题出在从 dataReader 读取数据。在 30 个线程上执行简单查询比在单线程上执行至少慢 15 倍。使用 PerfView 我发现大部分时间都浪费在 BLOCKED_TIME.
上为了进行测试,我使用带有 Ryzen Threadripper(32 核/64 线程)的服务器和 SqlServer 的本地实例。在具有相似规格的生产服务器上得到相同的结果。
我已经尝试了 运行 30 个应用程序实例 - 2-3 和 30 个实例之间的性能几乎没有差异,因此服务器性能足以进行 30 个并行查询。
我已经尝试对连接字符串进行一些更改,例如 increase/decrese min/max 池大小、禁用池、将 LCP 更改为 TCP - 没有结果。
class Program
{
static void Main(string[] args)
{
var ids = new List<Guid>() { ... }; //filled by database ids
var stats = new ConcurrentBag<long>();
//warmup
stats.Add(TestMethod());
Console.WriteLine(String.Format("|{0}|{1,5}ms|", "warmup", stats.Average()));
//start 1 to 30 threads (test on server with 32 cores / 64 threads)
for (int i = 1; i <= 30; i++)
{
stats = new ConcurrentBag<long>();
var tasks = Enumerable.Range(0, i).Select(idx =>
{
var id = ids[idx]; // separate ids to be sure we're not reading same records from disk
return Task.Run(() =>
{
for (int j = 0; j < 20; j++)
{
stats.Add(TestMethod(id));
}
});
}).ToArray();
Task.WaitAll(tasks);
Console.WriteLine(String.Format("|{0,2}|{1,5}ms|", i, (int)stats.Average()));
}
Console.WriteLine("End");
Console.ReadLine();
}
private static long TestMethod()
{
var records = new List<object[]>();
var sw = new Stopwatch();
using (var connection = new SqlConnection(ConnectionString))
{
connection.Open();
using (var transaction = connection.BeginTransaction())
using (var command = connection.CreateCommand())
{
command.Transaction = transaction;
command.CommandText = SqlQuery;
command.Parameters.Add(new SqlParameter("id", id));
// measure only dataReader time
sw.Start();
using (var dataReader = command.ExecuteReader())
{
// got ~2000 rows from query
while (dataReader.Read())
{
//read all data from row, test on Guid
var values = new object[6];
dataReader.GetValues(values);
records.Add(values);
}
}
sw.Stop();
}
}
return sw.ElapsedMilliseconds;
}
有什么方法可以提高性能并使我的应用程序可根据线程数进行扩展吗?
编辑。 要重现的数据库结构和示例查询:
/****** Object: Table [dbo].[Table_1] Script Date: 05.07.2019 14:08:15 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Table_1](
[Id] [uniqueidentifier] NOT NULL,
[Ref1] [uniqueidentifier] NULL,
[Field1] [uniqueidentifier] NULL,
[Field2] [uniqueidentifier] NULL,
CONSTRAINT [PK_Table_1] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object: Table [dbo].[Table_2] Script Date: 05.07.2019 14:08:15 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Table_2](
[Id] [uniqueidentifier] NOT NULL,
[Field1] [uniqueidentifier] NULL,
CONSTRAINT [PK_Table_2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object: Index [IDX_Table_1_Ref1] Script Date: 05.07.2019 14:08:15 ******/
CREATE NONCLUSTERED INDEX [IDX_Table_1_Ref1] ON [dbo].[Table_1]
(
[Ref1] ASC
)
INCLUDE ( [Field1],
[Field2]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = OFF) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Table_1] WITH CHECK ADD CONSTRAINT [FK_Table_1_Table_2] FOREIGN KEY([Ref1])
REFERENCES [dbo].[Table_2] ([Id])
GO
ALTER TABLE [dbo].[Table_1] CHECK CONSTRAINT [FK_Table_1_Table_2]
GO
select
t2.id as Id,
t2.Field1 as Field1,
t1.Id as T1_Id,
t1.Ref1 as T1_T2,
t1.Field1 as T1_Field1,
t1.Field2 as T1_Field2
from dbo.Table_2 t2
join dbo.Table_1 t1 on t1.Ref1 = t2.Id
where t2.id = @id
T1 中现在有 30 条记录,T1 中有 2000 * 30 条记录,因此每个线程都处理具有 30 条记录的同一个数据集。随机填充的数据 newid().
edit2.
我还在案例中比较了这个解决方案 - Sql 服务器上的 30 个独立进程与 1 个进程和 30 个线程。 30 个单独的进程工作正常 - 大约是原始执行时间的 150%,而不是 1500%。 最大的区别——有 30 个独立的进程和单线程,我有 ~14 个等待任务和 20k 批处理请求/秒,有单进程和 30 个线程,我有 > 30 个等待任务(主要在网络 I/O 上)和 2k 批处理请求/秒
设置
"System.GC.Server": true
解决了我的问题,现在它扩展到服务器上的最大可用线程。感谢您的帮助!
检查您的 GC 设置。
https://www.dotnetcurry.com/csharp/1471/garbage-collection-csharp-dotnet-core
设置参数
ServerGarbageCollection = true
ConcurrentGarbageCollection = false
可能会有帮助。 :)