Cuda C++奇偶排序实现
Cuda c++ odd even sort implementation
这是我的奇偶排序代码:
这段代码正在编译,并且 运行ning 没问题,但我猜不是排序。请帮我
我目前在 visual studio 2019 年使用 CUDA 11.3。
我的想法是创建奇函数和偶函数,然后 运行 它一个接一个地出现。
我正在对每个函数的过程进行多线程处理。换句话说 if(arr[2k]>arr[2k+1]) swap(arr[2k],arr[2 k+1]).
#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
using namespace std::chrono;
__global__ void Even(int *arr, int n) {
int index = threadIdx.x;
index = index * 2;
if (index < n-1) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index+ 1];
arr[index + 1] = temp;
}
}
}
__global__ void Odd(int* arr, int n) {
int index = threadIdx.x;
index = index * 2+1;
if (index <= n - 2) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index + 1];
arr[index+ 1] = temp;
}
}
}
#define n 10
int main(){
int *a;
int* ptr;
const int Size = sizeof(int) * n;
cudaMalloc((void**)&ptr, Size);
a = (int*)malloc(n * Size);
srand(time(NULL));
for(int i =0 ;i<n;i++){
a[i] = rand()%n;
}
for (int i = 0; i < n; i++) {
std:: cout << a[i] << " ";
}
std::cout << endl;
cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);
auto starttime = high_resolution_clock::now();
for (int i = 0; i < n / 2; i++) {
Even<<<1,n >>>(a, n);
Odd<<<1,n >>>(a, n);
}
cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);
auto stoptime = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stoptime-starttime);
std::cout<<" time : " <<duration.count()<<"ms"<<endl;
for (int i = 0; i < n; i++) {
std::cout << a[i] << " ";
}
std::cout << endl;
free(a);
cudaFree(ptr);
return 0;
}
我怀疑有两个问题。
首先,每次 运行 Odd()
都会覆盖数组中的第一个值。您应该删除行 arr[0] = 0;
以解决此问题。
其次,您将主机指针 a
而不是设备指针 ptr
传递给内核。您应该传递 ptr
。
经过这些(未经测试的)编辑,代码如下所示:
#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
using namespace std::chrono;
__global__ void Even(int *arr, int n) {
int index = threadIdx.x;
index = index * 2;
if (index < n-1) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index+ 1];
arr[index + 1] = temp;
}
}
}
__global__ void Odd(int* arr, int n) {
int index = threadIdx.x;
index = index * 2+1;
// no longer setting a[0] = 0
if (index <= n - 2) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index + 1];
arr[index+ 1] = temp;
}
}
}
#define n 10
int main(){
int *a;
int* ptr;
const int Size = sizeof(int) * n;
cudaMalloc((void**)&ptr, Size);
a = (int*)malloc(n * Size);
srand(time(NULL));
for(int i =0 ;i<n;i++){
a[i] = rand()%n;
}
for (int i = 0; i < n; i++) {
std:: cout << a[i] << " ";
}
std::cout << endl;
cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);
auto starttime = high_resolution_clock::now();
for (int i = 0; i < n / 2; i++) {
Even<<<1,n >>>(ptr, n); // ptr instead of a
Odd<<<1,n >>>(ptr, n); // ptr instead of a
}
cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);
auto stoptime = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stoptime-starttime);
std::cout<<" time : " <<duration.count()<<"ms"<<endl;
for (int i = 0; i < n; i++) {
std::cout << a[i] << " ";
}
std::cout << endl;
free(a);
cudaFree(ptr);
return 0;
}
这是我的奇偶排序代码: 这段代码正在编译,并且 运行ning 没问题,但我猜不是排序。请帮我 我目前在 visual studio 2019 年使用 CUDA 11.3。 我的想法是创建奇函数和偶函数,然后 运行 它一个接一个地出现。 我正在对每个函数的过程进行多线程处理。换句话说 if(arr[2k]>arr[2k+1]) swap(arr[2k],arr[2 k+1]).
#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
using namespace std::chrono;
__global__ void Even(int *arr, int n) {
int index = threadIdx.x;
index = index * 2;
if (index < n-1) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index+ 1];
arr[index + 1] = temp;
}
}
}
__global__ void Odd(int* arr, int n) {
int index = threadIdx.x;
index = index * 2+1;
if (index <= n - 2) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index + 1];
arr[index+ 1] = temp;
}
}
}
#define n 10
int main(){
int *a;
int* ptr;
const int Size = sizeof(int) * n;
cudaMalloc((void**)&ptr, Size);
a = (int*)malloc(n * Size);
srand(time(NULL));
for(int i =0 ;i<n;i++){
a[i] = rand()%n;
}
for (int i = 0; i < n; i++) {
std:: cout << a[i] << " ";
}
std::cout << endl;
cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);
auto starttime = high_resolution_clock::now();
for (int i = 0; i < n / 2; i++) {
Even<<<1,n >>>(a, n);
Odd<<<1,n >>>(a, n);
}
cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);
auto stoptime = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stoptime-starttime);
std::cout<<" time : " <<duration.count()<<"ms"<<endl;
for (int i = 0; i < n; i++) {
std::cout << a[i] << " ";
}
std::cout << endl;
free(a);
cudaFree(ptr);
return 0;
}
我怀疑有两个问题。
首先,每次 运行 Odd()
都会覆盖数组中的第一个值。您应该删除行 arr[0] = 0;
以解决此问题。
其次,您将主机指针 a
而不是设备指针 ptr
传递给内核。您应该传递 ptr
。
经过这些(未经测试的)编辑,代码如下所示:
#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
using namespace std;
using namespace std::chrono;
__global__ void Even(int *arr, int n) {
int index = threadIdx.x;
index = index * 2;
if (index < n-1) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index+ 1];
arr[index + 1] = temp;
}
}
}
__global__ void Odd(int* arr, int n) {
int index = threadIdx.x;
index = index * 2+1;
// no longer setting a[0] = 0
if (index <= n - 2) {
if (arr[index ] > arr[index + 1]) {
int temp = arr[index];
arr[index] = arr[index + 1];
arr[index+ 1] = temp;
}
}
}
#define n 10
int main(){
int *a;
int* ptr;
const int Size = sizeof(int) * n;
cudaMalloc((void**)&ptr, Size);
a = (int*)malloc(n * Size);
srand(time(NULL));
for(int i =0 ;i<n;i++){
a[i] = rand()%n;
}
for (int i = 0; i < n; i++) {
std:: cout << a[i] << " ";
}
std::cout << endl;
cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);
auto starttime = high_resolution_clock::now();
for (int i = 0; i < n / 2; i++) {
Even<<<1,n >>>(ptr, n); // ptr instead of a
Odd<<<1,n >>>(ptr, n); // ptr instead of a
}
cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);
auto stoptime = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stoptime-starttime);
std::cout<<" time : " <<duration.count()<<"ms"<<endl;
for (int i = 0; i < n; i++) {
std::cout << a[i] << " ";
}
std::cout << endl;
free(a);
cudaFree(ptr);
return 0;
}