智能指针与常规指针的性能对比
Smart pointer vs regular pointer for performance
在性能方面,使用智能指针与常规指针相比有优势还是劣势?
我是运行下面的代码,用VS2019编译的release和debug。
这些是 发布 的结果:
Assign Ptr time = 0.3285ms
Assign Smart ptr time = 0.101ms
Sum Ptr = 126756464
Sum Smart Ptr = 126756464
Sum Ptr time = 0.2124ms
Sum Smart ptr time = 0.2912ms
这些是 debug 的结果:
Assign Ptr time = 1.8149ms
Assign Smart ptr time = 15.8177ms
Sum Ptr = 126756464
Sum Smart Ptr = 126756464
Sum Ptr time = 1.8392ms
Sum Smart ptr time = 15.9617ms
代码
#include <iostream>
#include <cstdio> // getchar
#include <chrono>
#define HEIGHT 1000
#define WIDTH 1000
int main(void)
{
using std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
using std::chrono::duration;
using std::chrono::milliseconds;
uint8_t matrix[HEIGHT * WIDTH];
uint8_t* matrixPtr = new uint8_t[HEIGHT * WIDTH];
std::unique_ptr<uint8_t[]> matrixSmartPtr = std::make_unique<uint8_t[]>(HEIGHT * WIDTH);
int index = 0;
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
matrix[index] = rand() % 255;
index++;
}
}
index = 0;
auto t1 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
matrixPtr[index] = matrix[index];
index++;
}
}
auto t2 = high_resolution_clock::now();
index = 0;
auto t3 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
matrixSmartPtr.get()[index] = matrix[index];
index++;
}
}
auto t4 = high_resolution_clock::now();
/* Getting number of milliseconds as a double. */
duration<double, std::milli> ms_assign_n = t2 - t1;
duration<double, std::milli> ms_assign_s = t4 - t3;
std::cout << "Assign Ptr time = " << ms_assign_n.count() << "ms" << std::endl;
std::cout << "Assign Smart ptr time = " << ms_assign_s.count() << "ms" << std::endl;
int sumA = 0;
index = 0;
auto t5 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
sumA += matrixPtr[index];
index++;
}
}
auto t6 = high_resolution_clock::now();
std::cout << "Sum Ptr = " << sumA << std::endl;
int sumB = 0;
index = 0;
auto t7 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
sumB += matrixSmartPtr.get()[index];
index++;
}
}
auto t8 = high_resolution_clock::now();
std::cout << "Sum Smart Ptr = " << sumB << std::endl;
/* Getting number of milliseconds as a double. */
duration<double, std::milli> ms_sum_n = t6 - t5;
duration<double, std::milli> ms_sum_s = t8 - t7;
std::cout << "Sum Ptr time = " << ms_sum_n.count() << "ms" << std::endl;
std::cout << "Sum Smart ptr time = " << ms_sum_s.count() << "ms" << std::endl;
delete[] matrixPtr;
std::cout << "Press enter to finish" << std::endl;
std::getchar(); // Avoid program from exiting
return 0;
}
我不明白为什么在发行版中,智能指针的赋值速度更快,但求和结果相似甚至更差。
为什么智能指针在调试模式下更差?
智能指针与指针相比有劣势。
您可以在调试模式下看到它。
在发布模式的情况下,编译器进行优化并将 matrixSmartPtr.get() 的值存储在缓存中。
在发布模式下,您看到的只是噪音。 A unique_ptr
将被编译出来。您可以从编译的程序集中看到这一点(分配显示的 g++ 输出):
原始指针
mov edx, 1000000
mov rsi, rsp
mov rdi, r13
mov rbp, rax
call memcpy
唯一指针:
mov edx, 1000000
mov rsi, rsp
mov rdi, r12
mov r14, rax
call memcpy
甚至 unique_ptr
的创建也编译为:
call operator new[](unsigned long)
最后,有一个电话
call operator delete[](void*)
注意:正如 HolyBlackCat 在评论中提到的,有 ,其中 unique_ptr
较慢。
在性能方面,使用智能指针与常规指针相比有优势还是劣势?
我是运行下面的代码,用VS2019编译的release和debug。
这些是 发布 的结果:
Assign Ptr time = 0.3285ms
Assign Smart ptr time = 0.101ms
Sum Ptr = 126756464
Sum Smart Ptr = 126756464
Sum Ptr time = 0.2124ms
Sum Smart ptr time = 0.2912ms
这些是 debug 的结果:
Assign Ptr time = 1.8149ms
Assign Smart ptr time = 15.8177ms
Sum Ptr = 126756464
Sum Smart Ptr = 126756464
Sum Ptr time = 1.8392ms
Sum Smart ptr time = 15.9617ms
代码
#include <iostream>
#include <cstdio> // getchar
#include <chrono>
#define HEIGHT 1000
#define WIDTH 1000
int main(void)
{
using std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
using std::chrono::duration;
using std::chrono::milliseconds;
uint8_t matrix[HEIGHT * WIDTH];
uint8_t* matrixPtr = new uint8_t[HEIGHT * WIDTH];
std::unique_ptr<uint8_t[]> matrixSmartPtr = std::make_unique<uint8_t[]>(HEIGHT * WIDTH);
int index = 0;
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
matrix[index] = rand() % 255;
index++;
}
}
index = 0;
auto t1 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
matrixPtr[index] = matrix[index];
index++;
}
}
auto t2 = high_resolution_clock::now();
index = 0;
auto t3 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
matrixSmartPtr.get()[index] = matrix[index];
index++;
}
}
auto t4 = high_resolution_clock::now();
/* Getting number of milliseconds as a double. */
duration<double, std::milli> ms_assign_n = t2 - t1;
duration<double, std::milli> ms_assign_s = t4 - t3;
std::cout << "Assign Ptr time = " << ms_assign_n.count() << "ms" << std::endl;
std::cout << "Assign Smart ptr time = " << ms_assign_s.count() << "ms" << std::endl;
int sumA = 0;
index = 0;
auto t5 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
sumA += matrixPtr[index];
index++;
}
}
auto t6 = high_resolution_clock::now();
std::cout << "Sum Ptr = " << sumA << std::endl;
int sumB = 0;
index = 0;
auto t7 = high_resolution_clock::now();
for (int i = 0; i < HEIGHT; i++)
{
for (int j = 0; j < WIDTH; j++)
{
sumB += matrixSmartPtr.get()[index];
index++;
}
}
auto t8 = high_resolution_clock::now();
std::cout << "Sum Smart Ptr = " << sumB << std::endl;
/* Getting number of milliseconds as a double. */
duration<double, std::milli> ms_sum_n = t6 - t5;
duration<double, std::milli> ms_sum_s = t8 - t7;
std::cout << "Sum Ptr time = " << ms_sum_n.count() << "ms" << std::endl;
std::cout << "Sum Smart ptr time = " << ms_sum_s.count() << "ms" << std::endl;
delete[] matrixPtr;
std::cout << "Press enter to finish" << std::endl;
std::getchar(); // Avoid program from exiting
return 0;
}
我不明白为什么在发行版中,智能指针的赋值速度更快,但求和结果相似甚至更差。
为什么智能指针在调试模式下更差?
智能指针与指针相比有劣势。 您可以在调试模式下看到它。 在发布模式的情况下,编译器进行优化并将 matrixSmartPtr.get() 的值存储在缓存中。
在发布模式下,您看到的只是噪音。 A unique_ptr
将被编译出来。您可以从编译的程序集中看到这一点(分配显示的 g++ 输出):
原始指针
mov edx, 1000000
mov rsi, rsp
mov rdi, r13
mov rbp, rax
call memcpy
唯一指针:
mov edx, 1000000
mov rsi, rsp
mov rdi, r12
mov r14, rax
call memcpy
甚至 unique_ptr
的创建也编译为:
call operator new[](unsigned long)
最后,有一个电话
call operator delete[](void*)
注意:正如 HolyBlackCat 在评论中提到的,有 unique_ptr
较慢。