2019年5月25日土曜日

octave + cufft + pgi (openacc). !!!!!!!! This does not work !!!!!!!!!!


I tried to mimic the following page to call cuFFT from "octfile"(octave).
However, PGI compiler (19.4) dies.


If you want to try, save the following files and type

$ sh compile.sh
$ octave callOct.m


============ compile.sh ==============
#!/bin/bash
env CXX="pgc++" \
CXXFLAGS="-fast -acc -ta=tesla,cuda9.2,managed -Minfo=accel -Mcudalib=cufft" \
XTRA_CXXFLAGS=" " \
CPPFLAGS=" " \
DL_LD="pgc++" \
mkoctfile --verbose testFFTGPU.cc
============ compile.sh ==============
x


=== testFFTGPU.cc ===
#include<math.h>
#include <octave/oct.h>
#include <octave/parse.h>
#include <complex>
#include <chrono>
#include <fftw3.h>
#include "openacc.h"
#include "cufft.h"

void inv_CUFFT(Complex *in_data, Complex *out_data, int nc, int nr, void *stream)
{
    cufftHandle plan;
    cufftResult ResPlan = cufftPlan2d(&plan, nc,nr, CUFFT_Z2Z);
    cufftSetStream(plan, (cudaStream_t)stream);
    cufftResult ResExec = cufftExecZ2Z(plan,
                                      (cufftDoubleComplex*)in_data,
                                      (cufftDoubleComplex*)out_data,
                                      CUFFT_INVERSE);
    cufftDestroy(plan);
}


DEFUN_DLD(testFFTGPU, args, ,
            "main body;")
{
   ComplexMatrix Matrix(args(0).complex_matrix_value());
   octave_value_list retval;

   ComplexMatrix out(Matrix.dims());

   Complex *pmat = reinterpret_cast<Complex *> (const_cast<Complex *>(Matrix.fortran_vec()));
   Complex *pout = reinterpret_cast<Complex *> (const_cast<Complex *>(out.fortran_vec()));

   static dim_vector dv = Matrix.dims();

   int Nc = dv(0);
   int Nr = dv(1);

#pragma acc data copy(pmat[0:Nc*Nr],pout[0:Nc*Nr])
{
   void *stream = acc_get_cuda_stream(acc_async_sync);
#pragma acc host_data use_device(pmat,pout)
{
   inv_CUFFT(pmat,pout,Nc,Nr,stream);
}
}

   retval(0) = out;
   return retval;
}
=== end testFFTGPU.cc ==

=== callOct.m ===
mat = rand(100,100);
mat1 = mat;
mat2 = mat;
fftGPU = testFFTGPU(mat2);

ifft_mat1 = ifft2(mat1);
fftGPU - ifft_mat1
=== end callOct.m ===

0 件のコメント:

コメントを投稿