【发布时间】:2020-07-27 23:21:18
【问题描述】:
我正在尝试展开一个嵌套循环,该循环将数据存储在 C++ 中的二维动态内存分配中。虽然,我不太确定该怎么做。这是展开前的原始循环:
int steps[1];
Ipp32f* vectx = ippiMalloc_32f_C1(size0, size1, &(steps[0]));
for (int i = 0; i < size0; i++){
for (int j = 0; j < size1; j++){
Ipp32f* pointer = (Ipp32f*)((Ipp8u*)vectx + steps[0]*j + sizeof(Ipp32f)*i);
*pointer = datax[i];
}
}
datax 在我的程序中是一个值,size0 = 30 和 size1 = 10000 的数组。 我尝试了以下方法,但不幸的是每个位置的值都不相同。有人可以帮我吗?
for (int i = 0; i < size0; i+=4) {
for (int j = 0; j < size1; j+=4) {
*((Ipp32f*)((Ipp8u*)vectx+ (steps[0] * j +0)+ (sizeof(Ipp32f) * i ))) = datax[i];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 1) + (sizeof(Ipp32f) * i ))) = datax[i ];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 2) + (sizeof(Ipp32f) * i ))) = datax[i ];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 3) + (sizeof(Ipp32f) * i ))) = datax[i ];
}
for (int j = 0; j < size1; j += 4) {
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 0) + (sizeof(Ipp32f) * i+1))) = datax[i+1];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 1) + (sizeof(Ipp32f) * i+1))) = datax[i+1];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 2) + (sizeof(Ipp32f) * i+1))) = datax[i+1];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 3) + (sizeof(Ipp32f) * i+1))) = datax[i+1];
}
for (int j = 0; j < size1; j += 4) {
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 0) + (sizeof(Ipp32f) * i + 2))) = datax[i + 2];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 1) + (sizeof(Ipp32f) * i + 2))) = datax[i + 2];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 2) + (sizeof(Ipp32f) * i + 2))) = datax[i + 2];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 3) + (sizeof(Ipp32f) * i + 2))) = datax[i + 2];
}
for (int j = 0; j < size1; j += 4) {
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 0) + (sizeof(Ipp32f) * i + 3))) = datax[i + 3];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 1) + (sizeof(Ipp32f) * i + 3))) = datax[i + 3];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 2) + (sizeof(Ipp32f) * i + 3))) = datax[i + 3];
*((Ipp32f*)((Ipp8u*)vectx + (steps[0] * j + 3) + (sizeof(Ipp32f) * i + 3))) = datax[i + 3];
}
}
【问题讨论】:
-
从 ssteps[0] 来看,代码甚至没有编译。
标签: c++ intel-ipp loop-unrolling