【发布时间】:2014-03-29 17:30:11
【问题描述】:
我希望分析在内核上花费的总时间,运行多次,并且想知道这段代码是否会给我在流式内核上的总花费,或者返回的时间是否需要乘以启动次数.
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
for(x=0; x<SIZE; x+=N*2){
gpuErrchk(cudaMemcpyAsync(data_d0, data_h+x, N*sizeof(char), cudaMemcpyHostToDevice, stream0));
gpuErrchk(cudaMemcpyAsync(data_d1, data_h+x+N, N*sizeof(char), cudaMemcpyHostToDevice, stream1));
gpuErrchk(cudaMemcpyAsync(array_d0, array_h, wrap->size*sizeof(node_r), cudaMemcpyHostToDevice, stream0));
gpuErrchk(cudaMemcpyAsync(array_d1, array_h, wrap->size*sizeof(node_r), cudaMemcpyHostToDevice, stream1));
cudaEventRecord(start, 0);
GPU<<<N/512,512,0,stream0>>>(array_d0, data_d0, out_d0 );
GPU<<<N/512,512,0,stream1>>>(array_d1, data_d1, out_d1);
cudaEventRecord(stop, 0);
gpuErrchk(cudaMemcpyAsync(out_h+x, out_d0 , N * sizeof(int), cudaMemcpyDeviceToHost, stream0));
gpuErrchk(cudaMemcpyAsync(out_h+x+N, out_d1 ,N * sizeof(int), cudaMemcpyDeviceToHost, stream1));
}
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
printf("Time %f ms\n", elapsedTime);
【问题讨论】: