I tried to mimic the following page to call cuFFT from "octfile"(octave).
However, PGI compiler (19.4) dies.
If you want to try, save the following files and type
$ sh compile.sh
$ octave callOct.m
============ compile.sh ==============
#!/bin/bash
env CXX="pgc++" \
CXXFLAGS="-fast -acc -ta=tesla,cuda9.2,managed -Minfo=accel
-Mcudalib=cufft" \
XTRA_CXXFLAGS=" " \
CPPFLAGS=" " \
DL_LD="pgc++" \
mkoctfile --verbose testFFTGPU.cc
============ compile.sh ==============
x
x
=== testFFTGPU.cc ===
#include<math.h>
#include <octave/oct.h>
#include <octave/parse.h>
#include <complex>
#include <chrono>
#include <fftw3.h>
#include "openacc.h"
#include "cufft.h"
void inv_CUFFT(Complex *in_data, Complex *out_data, int nc,
int nr, void *stream)
{
cufftHandle plan;
cufftResult ResPlan =
cufftPlan2d(&plan, nc,nr, CUFFT_Z2Z);
cufftSetStream(plan,
(cudaStream_t)stream);
cufftResult ResExec = cufftExecZ2Z(plan,
(cufftDoubleComplex*)in_data,
(cufftDoubleComplex*)out_data,
CUFFT_INVERSE);
cufftDestroy(plan);
}
DEFUN_DLD(testFFTGPU, args, ,
"main body;")
{
ComplexMatrix Matrix(args(0).complex_matrix_value());
octave_value_list
retval;
ComplexMatrix out(Matrix.dims());
Complex *pmat = reinterpret_cast<Complex *> (const_cast<Complex
*>(Matrix.fortran_vec()));
Complex *pout = reinterpret_cast<Complex *> (const_cast<Complex
*>(out.fortran_vec()));
static
dim_vector dv = Matrix.dims();
int
Nc = dv(0);
int
Nr = dv(1);
#pragma acc data
copy(pmat[0:Nc*Nr],pout[0:Nc*Nr])
{
void
*stream = acc_get_cuda_stream(acc_async_sync);
#pragma acc host_data use_device(pmat,pout)
{
inv_CUFFT(pmat,pout,Nc,Nr,stream);
}
}
retval(0) = out;
return retval;
}
=== end testFFTGPU.cc ==
=== callOct.m ===
mat =
rand(100,100);
mat1 = mat;
mat2 = mat;
fftGPU =
testFFTGPU(mat2);
ifft_mat1 = ifft2(mat1);
fftGPU - ifft_mat1
=== end callOct.m ===
0 件のコメント:
コメントを投稿