【问题标题】:Matrix Multiplication MPI + OMP矩阵乘法 MPI + OMP
【发布时间】:2015-04-03 03:39:58
【问题描述】:

我在使用 MPI 和 OMP 进行矩阵乘法时遇到问题。代码已正确编译,但它给了我错误的结果,矩阵 c(在 matmul 函数中)中的值很大,矩阵 C(在 main 中)甚至没有从函数 matmul 中得到结果。如果有人知道如何解决它,请帮助。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <omp.h>
#include <mpi.h>

int offset,rows,br_elemenata,cvor_id,cvor,ukupno;
MPI_Status status;

double gettime(void) {
   struct timeval tv;
   gettimeofday(&tv, NULL);
   return tv.tv_sec + 1e-6 * tv.tv_usec;
}

void matfill(long N, double *mat, double val) {
   long i, j;

   for(i = 0; i < N; i ++)
      for(j = 0; j < N; j ++)
         mat[i * N + j] = val;
}

void matmul(long N, double *a, double *b, double *c) {
   long i, j, k;

  br_elemenata = N / ukupno;            //odredjujemo broj elemenata po cvoru

  if (N % ukupno != 0) br_elemenata++;      //inkrementujemo broj elemenata po cvoru kako ne bismo neki izostavili

  if (cvor == 0){
    for (cvor_id=1;cvor_id<ukupno;cvor_id++){
      offset = cvor_id * br_elemenata;
      rows = N - offset;
      if (rows > br_elemenata)
    rows = br_elemenata;
      // slanje podataka sa cvora 0 na ostale cvorove
      MPI_Send(&offset, 1, MPI_INT, cvor_id, 0, MPI_COMM_WORLD);
      MPI_Send(&rows, 1, MPI_INT, cvor_id, 0, MPI_COMM_WORLD);
      MPI_Send(a+offset, rows*N, MPI_DOUBLE, cvor_id, 0, MPI_COMM_WORLD);
      MPI_Send(b, N*N, MPI_DOUBLE, cvor_id, 0, MPI_COMM_WORLD);
  }
  offset = 0;
  rows = br_elemenata;
  } else {
    // Primanje podataka sa cvora 0
    MPI_Recv(&offset, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(&rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(a+offset, rows*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
    MPI_Recv(b, N*N, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status);
}

MPI_Barrier(MPI_COMM_WORLD);

#pragma omp parallel for shared(a,b,c) private(i,j,k)
   for (i = offset; i < offset + rows; i ++)
      for (j = 0; j < N; j ++)
         for (k = 0; k < N; k ++)
            c[i + j] += a[i + k] * b[k * N + j];
  printf("Clan: %5.2f\n",c[i]);
  if (cvor == 0) {
    for (cvor_id = 1; cvor_id < ukupno; cvor_id++) {
    MPI_Recv(&offset, 1, MPI_INT, cvor_id, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&rows, 1, MPI_INT, cvor_id, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(c+offset, rows*N, MPI_DOUBLE, cvor_id, 1, MPI_COMM_WORLD, &status);
    }
  } else {
    MPI_Send(&offset, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
    MPI_Send(&rows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
    MPI_Send(c+offset, rows*N, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
  }  
}

int main(int argc, char **argv) {
   long N;
   double *A, *B, *C, t;


   MPI_Init(&argc,&argv);       //Inicijalizacija MPI

   MPI_Comm_size(MPI_COMM_WORLD,&ukupno);   //odredjujemo ukupan broj cvorova
   MPI_Comm_rank(MPI_COMM_WORLD,&cvor);     //odredjuje redni broj cvora, nacin da se svaki cvor identifikuje u komunikaciji


   if (argc!=2) {
     if (cvor==0) printf("Morate unijeti dimenziju matrice!");
     MPI_Finalize();                        // ako ne postoji argument pri pozivu funkcije, zavrsiti program
     return 1;
   }

   N = atoi(argv[1]);
   A = (double *) malloc(N * N * sizeof(double));
   B = (double *) malloc(N * N * sizeof(double));
   C = (double *) malloc(N * N * sizeof(double));
   matfill(N, A, 1.0);
   matfill(N, B, 2.0);
   matfill(N, C, 0.0);




   t = gettime();
   matmul(N, A, B, C);
   t = gettime() - t;

   // if (cvor == 0){
      fprintf(stdout, "%ld\t%le\t%le\n", N, t, (2 * N - 1) * N * N / t);
      fflush(stdout);

      printf("Clan: %f\n",C[6]);
  //  }

   free(A);
   free(B);
   free(C);

   return EXIT_SUCCESS;
}

【问题讨论】:

    标签: matrix mpi multiplication openmp


    【解决方案1】:

    主要问题是通信操作期间的offset。应该是offset*N

    更正的代码:

    #include <stdio.h>
    #include <stdlib.h>
    #include <math.h>
    #include <sys/time.h>
    #include <omp.h>
    #include <mpi.h>
    
    int offset,rows,br_elemenata,cvor_id,cvor,ukupno;
    MPI_Status status;
    
    double gettime(void) {
        struct timeval tv;
        gettimeofday(&tv, NULL);
        return tv.tv_sec + 1e-6 * tv.tv_usec;
    }
    
    void matfill(long N, double *mat, double val) {
        long i, j;
    
        for(i = 0; i < N; i ++)
            for(j = 0; j < N; j ++)
                mat[i * N + j] = val;
    }
    
    void matprint(long N, double *mat) {
        long i, j;
    
        for(i = 0; i < N; i ++){
            for(j = 0; j < N; j ++){
                printf("%g ",mat[i*N+j]);
            }
            printf("\n");
        }
    }
    
    void matdiag(long N, double *mat, double val) {
        long i, j;
    
        for(i = 0; i < N; i ++)
            for(j = 0; j < N; j ++)
                if(i==j){
                    mat[i * N + j] = (double)i;
                }else{
                    mat[i * N + j] =0;
                }
    }
    
    void matmul(long N, double *a, double *b, double *c) {
        long i, j, k;
    
        br_elemenata = N / ukupno;            //odredjujemo broj elemenata po cvoru
    
        if (N % ukupno != 0) br_elemenata++;      //inkrementujemo broj elemenata po cvoru kako ne bismo neki izostavili
    
        if (cvor == 0){
            for (cvor_id=1;cvor_id<ukupno;cvor_id++){
                offset = cvor_id * br_elemenata;
                rows = N - offset;
                if (rows > br_elemenata)
                    rows = br_elemenata;
                // slanje podataka sa cvora 0 na ostale cvorove
                MPI_Send(&offset, 1, MPI_INT, cvor_id, 0, MPI_COMM_WORLD);
                MPI_Send(&rows, 1, MPI_INT, cvor_id, 1, MPI_COMM_WORLD);
                MPI_Send(a+(offset*N), rows*N, MPI_DOUBLE, cvor_id, 2, MPI_COMM_WORLD);
                MPI_Send(b, N*N, MPI_DOUBLE, cvor_id, 3, MPI_COMM_WORLD);
            }
            offset = 0;
            rows = br_elemenata;
        } else {
            // Primanje podataka sa cvora 0
            MPI_Recv(&offset, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
            MPI_Recv(&rows, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status);
            MPI_Recv(a+(offset*N), rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, &status);
            MPI_Recv(b, N*N, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, &status);
        }
    
        MPI_Barrier(MPI_COMM_WORLD);
    
    #pragma omp parallel for shared(a,b,c) private(i,j,k)
        for (i = offset; i < offset + rows; i ++)
            for (j = 0; j < N; j ++)
                for (k = 0; k < N; k ++)
                    c[i*N + j] += a[i*N + k] * b[k * N + j];
        printf("Clan: %5.2f\n",c[i]);
        if (cvor == 0) {
            for (cvor_id = 1; cvor_id < ukupno; cvor_id++) {
                MPI_Recv(&offset, 1, MPI_INT, cvor_id, 4, MPI_COMM_WORLD, &status);
                MPI_Recv(&rows, 1, MPI_INT, cvor_id, 5, MPI_COMM_WORLD, &status);
                MPI_Recv(c+(N*offset), rows*N, MPI_DOUBLE, cvor_id, 6, MPI_COMM_WORLD, &status);
            }
        } else {
            MPI_Send(&offset, 1, MPI_INT, 0, 4, MPI_COMM_WORLD);
            MPI_Send(&rows, 1, MPI_INT, 0, 5, MPI_COMM_WORLD);
            MPI_Send(c+(N*offset), rows*N, MPI_DOUBLE, 0, 6, MPI_COMM_WORLD);
        }  
    }
    
    int main(int argc, char **argv) {
        long N;
        double *A, *B, *C, t;
    
    
        MPI_Init(&argc,&argv);       //Inicijalizacija MPI
    
        MPI_Comm_size(MPI_COMM_WORLD,&ukupno);   //odredjujemo ukupan broj cvorova
        MPI_Comm_rank(MPI_COMM_WORLD,&cvor);     //odredjuje redni broj cvora, nacin da se svaki cvor identifikuje u komunikaciji
    
    
        if (argc!=2) {
            if (cvor==0) printf("Morate unijeti dimenziju matrice!");
            MPI_Finalize();                        // ako ne postoji argument pri pozivu funkcije, zavrsiti program
            return 1;
        }
    
        N = atoi(argv[1]);
        A = (double *) malloc(N * N * sizeof(double));
        B = (double *) malloc(N * N * sizeof(double));
        C = (double *) malloc(N * N * sizeof(double));
        matfill(N, A, 1.0);
        matfill(N, B, 2.0);
        matfill(N, C, 0.0);
        matdiag(N,A, 1) ;
    
    
    
    
        t = gettime();
        matmul(N, A, B, C);
        t = gettime() - t;
    
        if (cvor == 0){
            fprintf(stdout, "%ld\t%le\t%le\n", N, t, (2 * N - 1) * N * N / t);
            fflush(stdout);
            printf("Clan: %f\n",C[6]);
            printf("A\n");
            matprint(N, A) ;
            printf("B\n");
            matprint(N, B) ;
            printf("C\n");
            matprint(N, C) ;
    
        }
    
    
        free(A);
        free(B);
        free(C);
        MPI_Finalize();
        return EXIT_SUCCESS;
    }
    

    编译:mpicc main.c -o main 运行:mpirun -np 4 main

    如果你想走得更远,你会对MPI_Bcast() 函数感兴趣,它会向所有人发送相同的内容。 MPI_Scatter()MPI_Gather() 有助于分发矩阵或将其返回给定进程。

    此外,BLASdgemm() 函数可用于加速给定进程的计算。

    为了减少内存占用,AC 的分配大小可能会减少以考虑 br_elemenata(进程 0 除外)......并且偏移量将不得不改变......再次!

    【讨论】:

      猜你喜欢
      • 2013-12-23
      • 2018-09-08
      • 2017-10-30
      • 2012-11-04
      • 2014-04-21
      • 2017-10-12
      • 2016-01-23
      • 2013-03-05
      • 2017-07-18
      相关资源
      最近更新 更多