【问题标题】:Segmentation fault (core dumped) with my mex cuda code while exiting MATLAB退出 MATLAB 时我的 mex cuda 代码出现分段错误(核心转储)
【发布时间】:2014-03-22 01:17:02
【问题描述】:

我有一个混合的 mex 和 cuda 代码来评估 phi = 1/2*x'*A*x - b'*x,其中 x 和 b 都是 m x 1 向量,A 是 m x m 矩阵。该代码可以编译和执行,它也给了我正确的答案。

但是,当我退出 MATLAB 时,我不断收到错误 Segmentation fault (core dumped)。我在代码中所做的是,我在 MATLAB 中生成 A、b 和 x,使用 mex 函数将它们传递给 cuda。然后我在 GPU 上评估 phi = 1/2*x'*A*x - b'*x(使用 cuda 线性代数库 cublas)并使用 mex 将 phi 传输回 MATLAB。

谁能帮我看看问题出在哪里?提前致谢。

顺便说一句,这是我的编译方式:

nvcc -arch=sm_20 -c test.cu -Xcompiler -fPIC -I/site/local/matlab-r2012a/extern/include/
mex -L/usr/local/cuda/lib64 -lcudart -lcublas test.o

要打开 MATLAB,需要链接 libstdc++ 库:

LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6 matlab

在 MATLAB 中,我做了以下代码测试:

N = 500; x = randn(N,1);B = randn(N);A = B'*B; b = randn(N,1);    
tic, 1/2*x'*A*x-b'*x, toc    
tic, phi = cublas_mex_test(x,A,b),toc

下面是我的代码test.cu:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include "cublas_v2.h"
#include "cuda.h"
#include "mex.h"
#include <time.h>
#define IDX2C(i,j,ld) (((j)*(ld))+(i))

// this is the actual function that evaluates phi = 1/2*x'*A*x - b'*x
int PhiEval(double *x, double *A, double *b, double &phi, size_t m)
{ 
    cudaError_t cudaStat; 
    cublasStatus_t stat;
    cublasHandle_t handle;
    clock_t start, end;

    start = clock();
    //host data
    double *bx, *xAx;
    bx = (double*)malloc(1*sizeof(double)); 
    xAx = (double*)malloc(1*sizeof(double)); 

    //device data
    double* A_d, * x_d, *b_d, *Ax_d, *bx_d, *xAx_d;
    cudaStat = cudaMalloc ((void**)&A_d, m*m*sizeof(double));
    cudaStat = cudaMalloc ((void**)&x_d, m*sizeof(double));
    cudaStat = cudaMalloc ((void**)&b_d, m*sizeof(double));
    cudaStat = cudaMalloc ((void**)&Ax_d, m*sizeof(double));
    cudaStat = cudaMalloc ((void**)&bx_d, 1*sizeof(double));
    cudaStat = cudaMalloc ((void**)&xAx_d, 1*sizeof(double));
    if (cudaStat != cudaSuccess) {
        printf ("device memory allocation failed");
        return EXIT_FAILURE;
    }
    end = clock();
    printf ("It takes: %d clicks (%.7f seconds) to allocate the memory.\n",end-start,(double)(end-start)/CLOCKS_PER_SEC);

    start = clock();
    stat = cublasCreate(&handle);
    if (stat != CUBLAS_STATUS_SUCCESS) {
        printf ("CUBLAS initialization failed\n");
        return EXIT_FAILURE;
    }

    // copy host data to device
    stat = cublasSetMatrix (m,m, sizeof(double), A, m, A_d, m);
    stat = cublasSetVector (m, sizeof(double), x, 1, x_d, 1);
    stat = cublasSetVector (m, sizeof(double), b, 1, b_d, 1);
    if (stat != CUBLAS_STATUS_SUCCESS) {
        printf ("data download failed");
        cudaFree (A_d);
        cudaFree (x_d);
        cudaFree (b_d);
        cudaFree (Ax_d);
        cudaFree(xAx_d);
        cudaFree(bx_d);
        cublasDestroy(handle);
        free(bx); free(xAx);
        return EXIT_FAILURE;
    }
    end = clock();
    printf ("It takes: %d clicks (%.7f seconds) to copy the data to GPU.\n",end-start,(double)(end-start)/CLOCKS_PER_SEC);


    start = clock();
    //calculate A*x and store the result in Ax_d
    double alpha = 1;
    double beta = 0;
    stat = cublasDgemv(handle, CUBLAS_OP_N, m,m, &alpha, A_d, m, x_d, 1, &beta, Ax_d, 1);
    if (stat != CUBLAS_STATUS_SUCCESS) {
        printf ("data download failed");
        cudaFree (A_d);
        cudaFree (x_d);
        cudaFree (b_d);
        cudaFree (Ax_d);
        cudaFree(xAx_d);
        cudaFree(bx_d);
        cublasDestroy(handle);
        free(bx); free(xAx);
        return EXIT_FAILURE;
    }

    //calculate x'*A*x and store the result in xAx_d
    stat = cublasDgemv(handle, CUBLAS_OP_T, m,1, &alpha, x_d, m, Ax_d, 1, &beta, xAx_d, 1);
    if (stat != CUBLAS_STATUS_SUCCESS) {
        printf ("inner product failed");
        cudaFree (A_d);
        cudaFree (x_d);
        cudaFree (b_d);
        cudaFree (Ax_d);
        cudaFree(xAx_d);
        cudaFree(bx_d);
        cublasDestroy(handle);
        free(bx); free(xAx);
        return EXIT_FAILURE;
    }
    stat = cublasGetVector (1, sizeof(double),xAx_d, 1, xAx, 1); // copy the result x'*A*x to host

    //calculate b'*x and store the result in bx_d
    stat = cublasDgemv(handle, CUBLAS_OP_T, m,1, &alpha, b_d, m, x_d, 1, &beta, bx_d, 1);
    if (stat != CUBLAS_STATUS_SUCCESS) {
        printf ("inner product failed");
        cudaFree (A_d);
        cudaFree (x_d);
        cudaFree (b_d);
        cudaFree (Ax_d);
        cudaFree(xAx_d);
        cudaFree(bx_d);
        cublasDestroy(handle);
        free(bx); free(xAx);
        return EXIT_FAILURE;
    }
    stat = cublasGetVector (1, sizeof(double),bx_d, 1, bx, 1);
    end = clock();
    printf ("It takes: %d clicks (%.7f seconds) to call functions in cublas.\n",end-start,(double)(end-start)/CLOCKS_PER_SEC);

    //calculate phi = 1/2*x'*A*x - b'*x
    phi = .5*xAx[0]-bx[0];

    start = clock();
    //free the memory
    cudaFree (A_d);
    cudaFree (x_d);
    cudaFree (b_d);
    cudaFree (Ax_d);
    cudaFree(xAx_d);
    cudaFree(bx_d);
    cublasDestroy(handle);
    free(bx); free(xAx);

    end = clock();
    printf ("It takes: %d clicks (%.7f seconds) to free the memory.\n",end-start,(double)(end-start)/CLOCKS_PER_SEC);

    return EXIT_SUCCESS;
}

/* the gateway function */
void mexFunction( int nlhs, mxArray *plhs[],
                  int nrhs, const mxArray *prhs[])
{
    double phi;
    double *A, *b, *x;

    size_t mrows,ncols;

    /*  check for proper number of arguments */
    if(nrhs!=3) 
        mexErrMsgIdAndTxt( "MATLAB:MinTest:invalidNumInputs",
            "Three inputs required.");
    if(nlhs!=1) 
        mexErrMsgIdAndTxt( "MATLAB:MinTest:invalidNumOutputs",
            "One output required.");


    /*  create a pointer to the input vector x */
    x = mxGetPr(prhs[0]);

    /*  create a pointer to the input matrix A */
    A = mxGetPr(prhs[1]);

    /*  create a pointer to the input vector b */
    b = mxGetPr(prhs[2]);

    /*  get the dimensions of the matrix input A */
    mrows = mxGetM(prhs[1]);
    ncols = mxGetN(prhs[1]);

    if(mrows!=ncols) 
        mexErrMsgIdAndTxt( "MATLAB:MinTest:invalidMatrixInput",
            "A has to be a square matrix");

    /*  call the cpp subroutine */
    PhiEval(x,A,b,phi,mrows);   

    plhs[0] = mxCreateDoubleScalar(phi);
}

【问题讨论】:

  • 建议here 有帮助吗? (即,在调用 cublas_mex_test 例程之前尝试运行类似 gpuDevice() 之类的无害 gpu 活动)
  • 是的,当我运行 gpuDevice 时,问题就消失了!谢谢!

标签: c++ matlab cuda segmentation-fault mex


【解决方案1】:

显然,许多人已经观察到 matlab mex 在某些环境中可能会崩溃(在 matlab 退出或 mex 清除时)。这可能是由于安装的 cuda 版本与 matlab PCT(如果已安装)使用的版本不同,建议 here

在这种情况下,解决方法是在执行任何 mex 函数之前发出 gpu 函数,例如 matlab 中的gpuDevice()

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2022-08-21
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多