分散矩阵 - MPI答案

【问题标题】：Scatter a Matrix - MPI分散矩阵 - MPI
【发布时间】：2011-06-09 19:08:05
【问题描述】：

我尝试将矩阵逐行分散到所有处理器，但导致分段错误..我不知道我做错了什么..这是我的代码

      if(rank == 0) {
                    A_row = 10;
                    A_col = 10;
                    /* calculate the strip size */
                    strip_size = A_row / size;

                    /* genarate Matrix A */
                    A = (double **)malloc(sizeof(double*) * 10);
                    int k = 0;
                    for(i = 0; i < 10; i++) {
                            A[i] = (double*)malloc(sizeof(double) * 10);
                            for(j = 0; j < 10; j++) {
                                    A[i][j] = k;
                                    k++;
                                    printf("%lf  ", A[i][j]);
                            }
                            printf("\n");
                    }
            }

            /* Broadcasting the row, column size of Matrix A as well as strip size and Matrix B*/
            MPI_Bcast(&A_row, 1, MPI_INT, 0, MPI_COMM_WORLD);
            MPI_Bcast(&A_col, 1, MPI_INT, 0, MPI_COMM_WORLD);
            MPI_Bcast(&strip_size, 1, MPI_INT, 0, MPI_COMM_WORLD);

            /* defining a datatype for sub-matrix */
            MPI_Type_vector(strip_size, A_col, A_col, MPI_DOUBLE, &strip);
            MPI_Type_commit(&strip);

            strip_A = (double **)malloc(sizeof(double*)*strip_size);
            for(i= 0; i< strip_size; i++) {
                    strip_A[i] = (double*)malloc(sizeof(double)*A_col);
            }

            MPI_Scatter(&A[0][0], 1, strip, &strip_A[0][0], 1, strip, 0, MPI_COMM_WORLD);

            for(i = 0; i < strip_size; i++) {
                    if(i == 0) {
                            printf("rank = %d\n", rank);
                    }
                    for(j = 0; j < A_col; j++) {
                            printf("%lf  ", strip_A[i][j]);
                    }
                    printf("\n");
            }

谁能告诉我怎么了...

这是我运行时的错误

mpirun -np 2 ./a.out



 0.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  7.000000  8.000000  9.000000
 10.000000  11.000000  12.000000  13.000000  14.000000  15.000000  16.000000  17.000000  18.000000  19.000000
 20.000000  21.000000  22.000000  23.000000  24.000000  25.000000  26.000000  27.000000  28.000000  29.000000
 30.000000  31.000000  32.000000  33.000000  34.000000  35.000000  36.000000  37.000000  38.000000  39.000000
 40.000000  41.000000  42.000000  43.000000  44.000000  45.000000  46.000000  47.000000  48.000000  49.000000
 50.000000  51.000000  52.000000  53.000000  54.000000  55.000000  56.000000  57.000000  58.000000  59.000000
 60.000000  61.000000  62.000000  63.000000  64.000000  65.000000  66.000000  67.000000  68.000000  69.000000
 70.000000  71.000000  72.000000  73.000000  74.000000  75.000000  76.000000  77.000000  78.000000  79.000000
 80.000000  81.000000  82.000000  83.000000  84.000000  85.000000  86.000000  87.000000  88.000000  89.000000
 90.000000  91.000000  92.000000  93.000000  94.000000  95.000000  96.000000  97.000000  98.000000  99.000000 

 rank = 1
 42.000000  43.000000  44.000000  45.000000  46.000000  47.000000  48.000000  49.000000  0.000000  0.000000
 52.000000  53.000000  54.000000  55.000000  56.000000  57.000000  58.000000  59.000000  0.000000  0.000000
 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
 0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 
[seismicmstm:07338] *** Process received signal *** [seismicmstm:07338] Signal:
 Segmentation fault (11)
 [seismicmstm:07338] Signal code: 
 (128) [seismicmstm:07338] Failing at
 address: (nil)
 -------------------------------------------------------------------------- mpirun noticed that process rank 1 with PID 7338 on node seismicmstm.cluster exited on signal 11 (Segmentation fault).
 --------------------------------------------------------------------------

【问题讨论】：

对于初学者，你在哪一行得到了段错误？

标签： matrix mpi

【解决方案1】：

这里发生了几件事。好消息是最困难的事情——创建 mpi 数据类型，以及 MPI_Scatter 调用的基本结构——是正确的。

第一个问题是 MPI_Scatter 行使用 &(A[0][0]) ——但除了排名为零之外，你还没有设置 A 指向任何东西！所以你取消引用一个随机指针两次，这就是你的段错误。

正如 suszterpatt 所建议的，一个更微妙的问题是，不能保证您分配的内存行是连续的，因此即使您修复了上述问题，您的分散操作也可能无法正常工作。您正在尝试将 strip_size * A_col 双打从 A 中的某处发送到 strip_A，但 strip_A 可能不包含那么多连续的双打 - 它可能是 A_col 双打，然后是一些填充，然后是 A_col 双打 - 或者实际上，各个行可以散落在记忆中。三种修复方法是，按简单顺序（恕我直言）：（a）通过创建整个数组，然后创建二维 C 数组以指向正确的位置，使数据在内存中连续； (b) 一次只发送一行；或 (c) 创建一个 MPI 数据类型，它实际上反映了您的数据在内存中的映射方式（可能是随机映射）。

使用 (a) 的方法似乎有效（无论如何，对于 A_row 按大小均分）如下所示：

#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>

int main(int argc, char** argv) {
  int rank, size;
  int strip_size, A_row, A_col;
  double **A, **strip_A, *Adata, *stripdata;
  MPI_Datatype strip;
  int i,j;

  MPI_Init(&argc,&argv) ;

  MPI_Comm_rank(MPI_COMM_WORLD,&rank) ;
  MPI_Comm_size(MPI_COMM_WORLD,&size) ;

      if(rank == 0) {
                    A_row = 10;
                    A_col = 10;
                    /* calculate the strip size */
                    strip_size = A_row / size;

                    /* genarate Matrix A */
                    Adata = (double *)malloc(sizeof(double)*A_row*A_col);
                    A = (double **)malloc(sizeof(double*) * A_row);
                    for(i = 0; i < A_row; i++) {
                            A[i] = &(Adata[i*A_col]);
                    }
                    int k = 0;
                    for(i = 0; i < A_row; i++) {
                            for(j = 0; j < A_col; j++) {
                                    A[i][j] = k;
                                    k++;
                            }
                    }
            }

            /* Broadcasting the row, column size of Matrix A as well as strip size and Matrix B*/
            MPI_Bcast(&A_row, 1, MPI_INT, 0, MPI_COMM_WORLD);
            MPI_Bcast(&A_col, 1, MPI_INT, 0, MPI_COMM_WORLD);
            MPI_Bcast(&strip_size, 1, MPI_INT, 0, MPI_COMM_WORLD);

            /* defining a datatype for sub-matrix */
            MPI_Type_vector(strip_size, A_col, A_col, MPI_DOUBLE, &strip);
            MPI_Type_commit(&strip);

            stripdata = (double *)malloc(sizeof(double)*strip_size*A_col);
            strip_A = (double **)malloc(sizeof(double*)*strip_size);
            for(i= 0; i< strip_size; i++) {
                    strip_A[i] = &(stripdata[i*A_col]);
            }

            MPI_Scatter(Adata, 1, strip, &(strip_A[0][0]), 1, strip, 0, MPI_COMM_WORLD);
            //MPI_Scatter(Adata, A_col*strip_size, MPI_DOUBLE, &(strip_A[0][0]), A_col*strip_size, MPI_DOUBLE, 0, MPI_COMM_WORLD);

            for(i = 0; i < strip_size; i++) {
                    if(i == 0) {
                            printf("rank = %d\n", rank);
                    }
                    for(j = 0; j < A_col; j++) {
                            printf("%lf  ", strip_A[i][j]);
                    }
                    printf("\n");
            }

    MPI_Type_free(&strip);
    free(strip_A);
    free(stripdata);
    free(Adata);
    free(A);
    return 0;
}

【讨论】：

我认为代码在最后几行会出现分段错误，因为每个进程或节点都会尝试从原始矩阵中释放内存。我可以写 if(rank == 0) {free(Adata);免费(A);}

【解决方案2】：

我认为最终，您做错的是将矩阵存储为数组数组。我想你会发现，如果你将它存储在单个数组中（按行优先或列优先顺序，随你的喜好而定），事情会变得容易得多。

【讨论】：

那么，我不能将矩阵分散为数组的数组吗......难道不能实现它......
仅当您可以确保数据在内存中是连续的，因为这是 Scatter 的要求。
..而不仅仅是为了分散！在创建向量类型时，您声明该类型由 A_cols*strip_size 连续双精度数组成（A_cols 双精度数的 strip_size 块，每个块由 A_cols 双精度数分隔）。因此，任何使用该类型的通信 - send、recv、allreduce 等等 - 都需要连续的数据。

【解决方案3】：

只需添加 MPI_Finalize();听从你的命令。 ;) 请参阅下面的代码并输出。输出是正确的，但由于障碍而无法正确打印。您可以使用 MPI_Barrier() 或使用 MPI_Isend() 和 MPI_Irecv()。享受

#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>

int main(int argc, char** argv) {
int rank, size;
int strip_size, A_row, A_col;
double **A, **strip_A, *Adata, *stripdata;
MPI_Datatype strip;
int i,j;

MPI_Init(&argc,&argv) ;

MPI_Comm_rank(MPI_COMM_WORLD,&rank) ;
MPI_Comm_size(MPI_COMM_WORLD,&size) ;

  if(rank == 0) {
                A_row = 10;
                A_col = 10;
                /* calculate the strip size */
                strip_size = A_row / size;

                /* genarate Matrix A */
                Adata = (double *)malloc(sizeof(double)*A_row*A_col);
                A = (double **)malloc(sizeof(double*) * A_row);
                for(i = 0; i < A_row; i++) {
                        A[i] = &(Adata[i*A_col]);
                }
                int k = 0;
                for(i = 0; i < A_row; i++) {
                        for(j = 0; j < A_col; j++) {
                                A[i][j] = k;
                                k++;
                        }
                }
        }

        /* Broadcasting the row, column size of Matrix A as well as strip size and Matrix B*/
        MPI_Bcast(&A_row, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&A_col, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&strip_size, 1, MPI_INT, 0, MPI_COMM_WORLD);

        /* defining a datatype for sub-matrix */
        MPI_Type_vector(strip_size, A_col, A_col, MPI_DOUBLE, &strip);
        MPI_Type_commit(&strip);

        stripdata = (double *)malloc(sizeof(double)*strip_size*A_col);
        strip_A = (double **)malloc(sizeof(double*)*strip_size);
        for(i= 0; i< strip_size; i++) {
                strip_A[i] = &(stripdata[i*A_col]);
        }

        MPI_Scatter(Adata, 1, strip, &(strip_A[0][0]), 1, strip, 0, MPI_COMM_WORLD);
        //MPI_Scatter(Adata, A_col*strip_size, MPI_DOUBLE, &(strip_A[0][0]), A_col*strip_size, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        for(i = 0; i < strip_size; i++) {
                if(i == 0) {
                        printf("rank = %d\n", rank);
                }
                for(j = 0; j < A_col; j++) {
                        printf("%lf  ", strip_A[i][j]);
                }
                printf("\n");
        }
if(rank == 0){
MPI_Type_free(&strip);
free(strip_A);
free(stripdata);
free(Adata);
free(A);}

 MPI_Finalize();

 return 0;
}

输出

rank = 0
0.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  7.000000  8.000000  9.000000
rank = 2
 20.000000  21.000000  22.000000  23.000000  24.000000  25.000000  26.000000  27.000000  28.000000  29.000000  
rank = 6
60.000000  61.000000  62.000000  63.000000  64.000000  65.000000  66.000000  67.000000  68.000000  69.000000  
rank = 1
 10.000000  11.000000  12.000000  13.000000  14.000000  15.000000  16.000000  17.000000  18.000000  19.000000  
rank = 3
30.000000  31.000000  32.000000  33.000000  34.000000  35.000000  36.000000  37.000000  38.000000  39.000000  
rank = 5
50.000000  51.000000  52.000000  53.000000  54.000000  55.000000  56.000000  57.000000  58.000000  59.000000  
rank = 8
80.000000  81.000000  82.000000  83.000000  84.000000  85.000000  86.000000  87.000000  88.000000  89.000000  
rank = 7
70.000000  71.000000  72.000000  73.000000  74.000000  75.000000  76.000000  77.000000  78.000000  79.000000  
rank = 9
90.000000  91.000000  92.000000  93.000000  94.000000  95.000000  96.000000  97.000000  98.000000  99.000000  
rank = 4
40.000000  41.000000  42.000000  43.000000  44.000000  45.000000  46.000000  47.000000  48.000000  49.000000

【讨论】：

这应该是对已接受答案的（有效）评论。