C++ 模运算符 Vs。移位运算符,哪个更快,为什么?
C++ Modulus operator Vs. Shift operator, which is faster and why?
我正在编写查找素数的代码,在工作期间我开始好奇 C++ 中的 % 运算在低级别下究竟是如何工作的。
首先,我编写了一些代码来比较“%”运算符和“>>”运算符各自的运行时间。
#include <iostream>
#include <chrono>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
using namespace std;
bool remainder1(int x);
bool remainder2(int y);
void timeCompare(bool(*f)(int), bool(*g)(int));
// I want to check which one is faster, x % 2 Vs. (x >> 1) & 1
int main()
{
srand(time(NULL));
for (int i = 0; i < 10; i++) {
timeCompare(remainder1, remainder2);
}
return 0;
}
// % 2 operation
bool remainder1(int x) {
if (x % 128) return true;
else return false;
}
bool remainder2(int x) {
if ((x >> 7) & 1) return true;
else return false;
}
void timeCompare(bool(*f)(int), bool(*g)(int)) {
srand(time(NULL));
auto start = chrono::steady_clock::now();
for (int i = 0; i < 10000000; i++) {
int x = rand();
f(x);
}
auto end = chrono::steady_clock::now();
cout << "Elapsed time in nanoseconds : "
<< chrono::duration_cast<chrono::nanoseconds>(end - start).count()
<< " ns";
auto start2 = chrono::steady_clock::now();
for (int i = 0; i < 10000000; i++) {
int x = rand();
g(x);
}
auto end2 = chrono::steady_clock::now();
cout << " Vs. "
<< chrono::duration_cast<chrono::nanoseconds>(end2 - start2).count()
<< " ns" << endl;
}
输出是这样的:
Elapsed time in nanoseconds : 166158000 ns Vs. 218736000 ns
Elapsed time in nanoseconds : 151776000 ns Vs. 214823000 ns
Elapsed time in nanoseconds : 162193000 ns Vs. 217248000 ns
Elapsed time in nanoseconds : 151338000 ns Vs. 211793000 ns
Elapsed time in nanoseconds : 150346000 ns Vs. 211295000 ns
Elapsed time in nanoseconds : 155799000 ns Vs. 215265000 ns
Elapsed time in nanoseconds : 148801000 ns Vs. 212839000 ns
Elapsed time in nanoseconds : 149813000 ns Vs. 226175000 ns
Elapsed time in nanoseconds : 152324000 ns Vs. 213338000 ns
Elapsed time in nanoseconds : 149353000 ns Vs. 216809000 ns
所以看起来移位操作在求余数方面比较慢。我猜是因为 shift 版本比 '%' 版本需要多比较一次...我说得对吗?
我真的很想知道“%”在较低级别是如何工作的!
I really want to know how '%' works in lower level!
如果你问它是如何实现的,那么答案是 你正在使用的 CPU 有一个 单一 指令对于模 (%
)。例如,采用此 C++ 代码:
int main()
{
int x = 100;
int mod = x % 128;
int shift = x >> 7;
return 0;
}
为其生成的x86汇编代码(Clang 6.0.0)为:
main:
push rbp
mov rbp, rsp
xor eax, eax
mov ecx, 128
mov dword ptr [rbp - 4], 0
mov dword ptr [rbp - 8], 100
mov edx, dword ptr [rbp - 8] # Start of modulo boilerplater
mov dword ptr [rbp - 20], eax
mov eax, edx
cdq
idiv ecx # Modulo CPU instruction
mov dword ptr [rbp - 12], edx # End of modulo sequence
mov ecx, dword ptr [rbp - 8] # Start of shift boilerplate
sar ecx, 7 # Shift CPU instruction
mov dword ptr [rbp - 16], ecx # End of shift sequence
mov ecx, dword ptr [rbp - 20]
mov eax, ecx
pop rbp
ret
idiv
instruction 称为 有符号除法 ,它将商放在 EAX/RAX 中,将余数放在 EDX/RDX 中,因为 (x86/x64相应地)。
I guessed the reason is that shift version needs one more comparison
than '%' version... Is my correct?
在这种情况下没有进行比较,因为它是一条指令。
我正在编写查找素数的代码,在工作期间我开始好奇 C++ 中的 % 运算在低级别下究竟是如何工作的。
首先,我编写了一些代码来比较“%”运算符和“>>”运算符各自的运行时间。
#include <iostream>
#include <chrono>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
using namespace std;
bool remainder1(int x);
bool remainder2(int y);
void timeCompare(bool(*f)(int), bool(*g)(int));
// I want to check which one is faster, x % 2 Vs. (x >> 1) & 1
int main()
{
srand(time(NULL));
for (int i = 0; i < 10; i++) {
timeCompare(remainder1, remainder2);
}
return 0;
}
// % 2 operation
bool remainder1(int x) {
if (x % 128) return true;
else return false;
}
bool remainder2(int x) {
if ((x >> 7) & 1) return true;
else return false;
}
void timeCompare(bool(*f)(int), bool(*g)(int)) {
srand(time(NULL));
auto start = chrono::steady_clock::now();
for (int i = 0; i < 10000000; i++) {
int x = rand();
f(x);
}
auto end = chrono::steady_clock::now();
cout << "Elapsed time in nanoseconds : "
<< chrono::duration_cast<chrono::nanoseconds>(end - start).count()
<< " ns";
auto start2 = chrono::steady_clock::now();
for (int i = 0; i < 10000000; i++) {
int x = rand();
g(x);
}
auto end2 = chrono::steady_clock::now();
cout << " Vs. "
<< chrono::duration_cast<chrono::nanoseconds>(end2 - start2).count()
<< " ns" << endl;
}
输出是这样的:
Elapsed time in nanoseconds : 166158000 ns Vs. 218736000 ns
Elapsed time in nanoseconds : 151776000 ns Vs. 214823000 ns
Elapsed time in nanoseconds : 162193000 ns Vs. 217248000 ns
Elapsed time in nanoseconds : 151338000 ns Vs. 211793000 ns
Elapsed time in nanoseconds : 150346000 ns Vs. 211295000 ns
Elapsed time in nanoseconds : 155799000 ns Vs. 215265000 ns
Elapsed time in nanoseconds : 148801000 ns Vs. 212839000 ns
Elapsed time in nanoseconds : 149813000 ns Vs. 226175000 ns
Elapsed time in nanoseconds : 152324000 ns Vs. 213338000 ns
Elapsed time in nanoseconds : 149353000 ns Vs. 216809000 ns
所以看起来移位操作在求余数方面比较慢。我猜是因为 shift 版本比 '%' 版本需要多比较一次...我说得对吗?
我真的很想知道“%”在较低级别是如何工作的!
I really want to know how '%' works in lower level!
如果你问它是如何实现的,那么答案是 你正在使用的 CPU 有一个 单一 指令对于模 (%
)。例如,采用此 C++ 代码:
int main()
{
int x = 100;
int mod = x % 128;
int shift = x >> 7;
return 0;
}
为其生成的x86汇编代码(Clang 6.0.0)为:
main:
push rbp
mov rbp, rsp
xor eax, eax
mov ecx, 128
mov dword ptr [rbp - 4], 0
mov dword ptr [rbp - 8], 100
mov edx, dword ptr [rbp - 8] # Start of modulo boilerplater
mov dword ptr [rbp - 20], eax
mov eax, edx
cdq
idiv ecx # Modulo CPU instruction
mov dword ptr [rbp - 12], edx # End of modulo sequence
mov ecx, dword ptr [rbp - 8] # Start of shift boilerplate
sar ecx, 7 # Shift CPU instruction
mov dword ptr [rbp - 16], ecx # End of shift sequence
mov ecx, dword ptr [rbp - 20]
mov eax, ecx
pop rbp
ret
idiv
instruction 称为 有符号除法 ,它将商放在 EAX/RAX 中,将余数放在 EDX/RDX 中,因为 (x86/x64相应地)。
I guessed the reason is that shift version needs one more comparison than '%' version... Is my correct?
在这种情况下没有进行比较,因为它是一条指令。