【发布时间】:2016-02-29 23:23:46
【问题描述】:
你好,我是 cuda 编程的初学者。我使用 lock.lock() 函数等待之前的线程完成工作。这是我的代码:
#include "book.h"
#include <cuda.h>
#include <conio.h>
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <stdio.h>
#include <math.h>
#include <fstream>
#include <string>
#include <curand.h>
#include <curand_kernel.h>
#include "lock.h"
#define pop 10
#define gen 10
#define pg pop*gen
using namespace std;
__global__ void hold(Lock lock,float* a )
{
__shared__ int cache[gen];
int tid=blockIdx.x * blockDim.x+threadIdx.x;
int cacheIndex = threadIdx.x;
if(tid<gen)
{
a[tid]=7;//this number example but in my chase this random number
}
else
{
//cache[cacheIndex]=a[tid];
int temp;
if(tid%gen==0)
{
a[tid]=tid+4;//this example number but in my chase this random number if tid==tid%gen
temp=a[tid];
tid+=blockIdx.x*gridDim.x;
}
else
{
__syncthreads();
a[tid]=temp+1;//this must a[tid]=a[tid-1]+1;
temp=a[tid];
tid+=blockIdx.x*gridDim.x;
}
cache[cacheIndex]=temp;
__syncthreads();
for (int i=0;i<gen;i++)
{
if(cacheIndex==i)
{
lock. lock();
cache[cacheIndex]=temp;
lock.unlock();
}
}
}
}
int main()
{
float time;
float* a=new float [pg];
float *dev_a;
HANDLE_ERROR( cudaMalloc( (void**)&dev_a,pg *sizeof(int) ) );
Lock lock;
cudaEvent_t start, stop;
HANDLE_ERROR( cudaEventCreate(&start) );
HANDLE_ERROR( cudaEventCreate(&stop) );
HANDLE_ERROR( cudaEventRecord(start, 0) );
hold<<<pop,gen>>>(lock,dev_a);
HANDLE_ERROR( cudaMemcpy( a, dev_a,pg * sizeof(float),cudaMemcpyDeviceToHost ) );
HANDLE_ERROR( cudaEventRecord(stop, 0) );
HANDLE_ERROR( cudaEventSynchronize(stop) );
HANDLE_ERROR( cudaEventElapsedTime(&time, start, stop) );
for(int i=0;i<pop;i++)
{
for(int j=0;j<gen;j++)
{
cout<<a[(i*gen)+j]<<" ";
}
cout<<endl;
}
printf("hold: %3.1f ms \n", time);
HANDLE_ERROR(cudaFree(dev_a));
HANDLE_ERROR( cudaEventDestroy( start ) );
HANDLE_ERROR( cudaEventDestroy( stop ) );
system("pause");
return 0;
}
结果如下:
7 7 7 7 7 7 7 7 7 7
14 0 0 0 0 0 0 0 0 0
24 0 0 0 0 0 0 0 0 0
34 0 0 0 0 0 0 0 0 0
44 0 0 0 0 0 0 0 0 0
54 0 0 0 0 0 0 0 0 0
64 0 0 0 0 0 0 0 0 0
74 0 0 0 0 0 0 0 0 0
84 0 0 0 0 0 0 0 0 0
94 0 0 0 0 0 0 0 0 0
我的预期结果:
7 7 7 7 7 7 7 7 7 7
14 15 16 17 18 19 20 21 22 23
24 25 26 27 28 29 23 31 32 33
34 35 36 37 38 39 40 41 42 43
44 45 46 47 48 49 50 51 52 53
54 55 56 57 58 59 60 61 62 63
64 65 66 67 68 69 70 71 72 73
74 75 76 77 78 79 80 81 82 83
84 85 86 87 88 89 90 91 92 93
94 95 96 97 98 99 100 101 102 103
请任何人帮助我更正我的代码。谢谢
【问题讨论】:
标签: cuda critical-section