【发布时间】:2015-05-03 06:17:54
【问题描述】:
我编写了一个程序,在 C 中执行高斯消元并返回矩阵的 L2 范数。该程序被称为./exec n k,其中 n 是 nxn 矩阵的大小,k 是将用于执行该程序的线程数(最多 4 个)。我运行像 ./gauss 30 4 这样的程序,它会出现段错误。
使用 GDB 后,我发现线程退出的数量比预期的要多,我认为因为我的线程是在 for 循环中创建的,所以它们会被一遍又一遍地连接和创建,从而导致 seg 错误。有人可以指出我正确的方向
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y,z;
int bvectcount = 0;
int threadcount;
pthread_t workerThreads[4];
typedef struct threader {
int counter;
int matrixl;
} threader;
struct timeval start, end;
void *retval;
int checkargs(int argc, char* argv[]);
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double squaresum = (double) 0;
for (i=0; i < matrixSize; i++) {
squaresum += res[i] * res[i];
}
return sqrt(squaresum);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
void *parallelstuff(void *args){
threader temp = *((threader *)args);
int i, matrixSize;
i = temp.counter;
matrixSize = temp.matrixl;
double temp2;
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++)
{
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
threader stuff;
stuff.counter = i;
stuff.matrixl = matrixSize;
//MAKE EACH THREAD DO SOMETHING DIFF
//
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
while(threadcount != 0){
if(pthread_join (workerThreads[threadcount-1], &retval ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
threadcount--;
}
//create matrix of n size
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
}
终端返回:
(gdb) run 3 4
Starting program: (filepath removed but its there lol)
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x7ffff72fe700 (LWP 32612)]
[Thread 0x7ffff72fe700 (LWP 32612) exited]
[New Thread 0x7ffff6afd700 (LWP 32613)]
[New Thread 0x7ffff62fc700 (LWP 32614)]
[Thread 0x7ffff6afd700 (LWP 32613) exited]
[Thread 0x7ffff62fc700 (LWP 32614) exited]
[New Thread 0x7ffff5afb700 (LWP 32615)]
[Thread 0x7ffff5afb700 (LWP 32615) exited]
[New Thread 0x7ffff72fe700 (LWP 32616)]
[Thread 0x7ffff72fe700 (LWP 32616) exited]
[New Thread 0x7ffff6afd700 (LWP 32617)]
[Thread 0x7ffff6afd700 (LWP 32617) exited]
[New Thread 0x7ffff62fc700 (LWP 32618)]
[Thread 0x7ffff62fc700 (LWP 32618) exited]
[New Thread 0x7ffff5afb700 (LWP 32619)]
[Thread 0x7ffff5afb700 (LWP 32619) exited]
[New Thread 0x7ffff72fe700 (LWP 32620)]
[Thread 0x7ffff72fe700 (LWP 32620) exited]
[New Thread 0x7ffff6afd700 (LWP 32621)]
[Thread 0x7ffff6afd700 (LWP 32621) exited]
[New Thread 0x7ffff62fc700 (LWP 32622)]
[Thread 0x7ffff62fc700 (LWP 32622) exited]
[New Thread 0x7ffff5afb700 (LWP 32623)]
[Thread 0x7ffff5afb700 (LWP 32623) exited]
THIS IS L2 NORM: 0.000000
end time: 0.009503
[Inferior 1 (process 32611) exited normally]
(gdb)
编辑:我在 GDB 中发现了一些有趣的东西,它可以说明很多问题。这永远循环,然后最终出现段错误。有人可以帮我理解这是什么吗?
【问题讨论】:
-
2 种口味?我以前有一个 checkargs 的原型,但我把 checkargs 移到了 main 上面,只是忘了把原型拿出来
-
哎呀,我错过了
; -
首先,去掉程序中的所有演员表。它们完全没用,可能只是隐藏错误。然后不要将指针数组用作假矩阵。 C 具有更容易处理的 2D 矩阵内置。像
double (*A)[n] = malloc(sizeof(double[n][n]));这样的东西很容易完成这项工作。您的问题很可能只是您的复杂分配方案。 -
另外,请正确缩进你的代码,这很难读。
-
我已经缩进了不合适的行!