【发布时间】:2021-11-28 11:29:45
【问题描述】:
我正在尝试在 NVIDIA GPU 上运行以下代码,但每次都得到不同的结果。据我所知,问题不在于自旋锁本身(它正确地强制锁定本地内存中的变量),而在于atomicFunc 调用后的屏障损坏。我尝试使用 1 个大小为 256 的工作组运行此示例。该问题仅在 NVIDIA GPU 上观察到。
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
int baseFunc(private int x)
{
return (x + 1);
}
int atomicFunc(__local int* localAccMutex, __local int* x)
{
int oldValue;
bool flag = 1;
while (flag) {
int old = atom_xchg(&localAccMutex[0], 1);
if (old == 0) {
oldValue = *x;
*x = baseFunc(*x);
atom_xchg(&localAccMutex[0], 0);
flag = 0;
};
barrier(CLK_LOCAL_MEM_FENCE);
};
return oldValue;
}
__kernel void kernel(__global int* result)
{
__local int localAcc[1];
__local int localAccMutex[1];
if (get_local_id(0) == 0) {
localAccMutex[0] = 0;
};
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) == 0) {
localAcc[0] = 0;
};
barrier(CLK_LOCAL_MEM_FENCE);
atomicFunc(localAccMutex, &localAcc[0]);
// warps are ignoring this barrier
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) == 0) {
result[0] = localAcc[0];
};
}
如果有任何帮助,我将不胜感激。
【问题讨论】:
标签: synchronization opencl nvidia spinlock