如何使用 C++ STL 向量通过 Boost MPI 发送矩阵的列？答案

【问题标题】：How to send columns of a matrix via Boost MPI using C++ STL vectors?如何使用 C++ STL 向量通过 Boost MPI 发送矩阵的列？
【发布时间】：2011-11-21 19:40:58
【问题描述】：

我想发送以 STL 向量形式存储的矩阵的多列

    vector < vector < double > > A ( 10, vector <double> (10));

使用 Boost MPI 无需将内容复制到某个缓冲区（因为这里的计算时间至关重要）。

我发现，如何使用 MPI 完成此操作。这是示例代码如何将 10 x 10 矩阵的第 4、第 5 和第 6 列从一个进程 (rank==0) 发送到另一个进程 (rank==1)。（尽管我不知道为什么我必须在 MPI_Typ_vector 的第三个参数中添加“2”。有人知道为什么吗？）。

    int rank, size;
    MPI_Init (&argc, &argv);        /* starts MPI */
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);  /* get current process id */
    MPI_Comm_size (MPI_COMM_WORLD, &size);  /* get number of processes */

    // fill matrices
    vector< vector <float> >A(10, vector <float> (10));
    vector< vector <float> >A_copy(10, vector <float> (10));
    for (int i=0; i!=10; i++)
    {
            for (int j=0; j!=10; j++)
            {
                    A[i][j]=j+ i*10;
                    A_copy[i][j]=0.0;
            }
    }

    int dest=1;
    int tag=1;
    // define new type = two columns
    MPI_Datatype    newtype;
    MPI_Type_vector(10,     /* # column elements */
    3,                      /* 3 column only */
    10+2,                   /* skip 10 elements */
    MPI_FLOAT,              /* elements are float */
    &newtype);              /* MPI derived datatype */
    MPI_Type_commit(&newtype);

    if (rank==0)
    {
            MPI_Send(&A[0][4], 1, newtype, dest, tag, MPI_COMM_WORLD);
    }
    if (rank==1)
            MPI_Status status;
            MPI_Recv(&A_copy[0][4], 1, newtype, 0, tag, MPI_COMM_WORLD, &status);
    }
    MPI_Finalize();

在 Boost 网页上，他们声称 MPI_Type_vector “在 Boost.MPI 中自动使用”（http://www.boost.org/doc/libs/1_47_0/doc/html/mpi/tutorial.html#mpi. c_mapping）。

但我找不到如何详细执行此操作的示例。只知道如何使用 Boost 发送整个矩阵或每个元素。

提前谢谢你，

托比亚斯

【问题讨论】：

标签： c++ boost stl matrix mpi

【解决方案1】：

我通过编写自己的类“列”并将其序列化来解决问题。这是一个示例代码：

#include<iostream>
#include<vector>
#include <boost/mpi/environment.hpp>
#include <boost/mpi/communicator.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/complex.hpp>

using namespace std;   
namespace mpi=boost::mpi;

class columns
{
public:
int Nr;
int Nc;
int J0;
int J1;
vector < vector <double> >* matrix;

columns(vector < vector <double> > & A, int j0, int j1)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
    this->J0=j0;
    this->J1=j1;
}
columns(vector < vector <double> > & A)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
}
columns(){};
};

namespace boost {
namespace serialization {

    template<class Archive>
    void serialize(Archive & ar, columns & g, const unsigned int version)
    {
        ar & g.Nr;
        ar & g.Nc;
        ar & g.J0;
        ar & g.J1;

        for (int i=0; i!=g.Nr; i++)
        {       
            for (int j=g.J0; j!=g.J1; j++)
            {       
                ar & (*g.matrix)[i][j];
            }
        }
    }
}
}


int main(int argc, char * argv[])
{
mpi::environment env(argc, argv);
mpi::communicator world;
int myid=world.rank();
int NN=world.size();

int Nl=3;
int Ng=5;

int myStart=myid*Ng/NN;
int myEnd=(myid+1)*Ng/NN;
int myN=myEnd-myStart;

if (myid==0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=n+j;
        }
    }

    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "\t";
        }
        cout << endl;
    }
    cout << "############################" << endl;

    // divide grid for parallization
    vector<int> starts(NN);
    vector<int> ends(NN);
    vector<int> Nwork(NN);
    for (int p=0; p!=NN; p++)
    {
        starts[p]=p*Ng/NN;
        ends[p]=(p+1)*Ng/NN;
        Nwork[p]=ends[p]-starts[p];
    }


    vector<columns> input_columns(NN);
    for (int p=1; p!=NN; p++)
    {
        input_columns[p]=columns(input, starts[p], ends[p]);
    }


    for (int p=1; p!=NN; p++)
    {
        world.send(p, 1, input_columns[p]);
    }
}

if (myid!=0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=0.0;
        }
    }

    columns input_columns  = columns(input, myStart, myEnd);

    world.recv(0, 1, input_columns); 


    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "\t";
        }
        cout << endl;
    }
    cout << "############################" << endl;
}
}

解释：“columns”类包含一个指向矩阵的指针和两个数字，指示列的开始和结束位置。

class columns
{
    public:
    int Nr;              // number of rows in the matrix
    int Nc;              // number of columns in the matrix
    int J0;              // column start index
    int J1;              // column end index
    vector < vector <double> >* matrix;

    columns(vector < vector <double> > & A, int j0, int j1)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
            this->J0=j0;
            this->J1=j1;
    }
    columns(vector < vector <double> > & A)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
    }
    columns(){};
};

下面的代码告诉 boost-serialization 如何序列化这个'columns'-class：

namespace boost {
    namespace serialization {

            template<class Archive>
            void serialize(Archive & ar, columns & g, const unsigned int version)
            {
                    ar & g.Nr;
                    ar & g.Nc;
                    ar & g.J0;
                    ar & g.J1;

                    for (int i=0; i!=g.Nr; i++)
                    {
                            for (int j=g.J0; j!=g.J1; j++)
                            {
                                    ar & (*g.matrix)[i][j];
                            }
                    }
            }
    }
}

然后一个填充矩阵'输入'

vector < vector <double> > input (Nl, vector <double>(Ng));
            for (int n=0; n!=Nl; n++)
            {
                    for (int j=0; j!=Ng; j++)
                    {
                            input[n][j]=n+j;
                    }
            }

并初始化一个列类对象（它现在包含一个指向矩阵“输入”的指针）：

vector<columns> input_columns(NN)

并通过

将其发送到另一个（子）进程

world.send(p, 1, input_columns);

最终被接收到

world.recv(0, 1, input_columns);

【讨论】：

【解决方案2】：

如果您要在 A 上执行大量列操作，也许您应该存储 A 转置而不是 A。这会将列放在连续的内存位置。这意味着您可以使用 MPI_Send 发送列，而无需执行任何复制操作。此外，列操作会更快。

【讨论】：

感谢您的回答。但我需要在程序的另一点处理矩阵 A[i] 的行。例如。将行 A[i] 作为参数传递给需要向量的函数。因此，如果不复制整个矩阵，我就无法切换行和列。无论如何，应该有一种方法可以使用 boost mpi 来做到这一点，因为它可以使用 mpi。