I succeeded to run cuFFT + OpenACC + Octave with GCC-8.
Waiting for a response from PGI.
Waiting for a response from PGI.
$ sudo apt-get install
gcc-8-offload-nvptx
$ sudo apt-get install g++-8$ cat Makefile
CC= g++-8 -fopenacc
LD=g++-8 -fopenacc -L/usr/local/cuda-9.2/lib64 -lcufft
C_OPT=-fPIC
C_INC=-I/usr/local/cuda-9.2/include
OCT_INC=-I/usr/include/octave-4.2.2/octave/.. -I/usr/include/octave-4.2.2/octave
OCT_LIBS=-L/usr/lib/x86_64-linux-gnu/octave/4.2.2 -loctinterp -loctave
.SUFFIXES:
.SUFFIXES: .o .cc
.cc.o:; $(CC) $(C_INC) $(OCT_INC) -c $(C_OPT) $<
.f90.o:; $(F90) -c $(F_OPT) $<
OBJS = \
testFFTGPU.o
TARGET = testFFTGPU.oct
all: $(TARGET)
$(TARGET): $(OBJS)
$(LD) $(OCT_INC) -shared -Wl,-Bsymbolic -o $(TARGET) $(OBJS) $(OCT_LIBS) -Wl,-Bsymbolic-functions
run:
./run.sh
clean:
rm -f *.o *.d *.mod $(TARGET) *~
rm -f work.pc work.pcl *.optrpt log.txt
rm -f *.gpu *.ptx *.oct
$ cat run.sh
LD_PRELOAD=/opt/pgi/linux86-64-nollvm/2019/cuda/9.2/lib64/libcudart.so.9.2:/opt/pgi/linux86-64-llvm/2019/cuda/9.2/lib64/libcufft.so.9.2 octave callOct.m
$ cat callOct.m
mat = rand(10,10);
mat1 = mat;
mat2 = mat;
fftGPU = testFFTGPU(mat2);
ifft_mat1 = ifft2(mat1);
fftGPU - ifft_mat1
$ cat testFFTGPU.cc
#include<math.h>
#include <octave/oct.h>
#include <octave/parse.h>
#include <complex>
#include <chrono>
#include <fftw3.h>
#include "openacc.h"
#include "cufft.h"
void inv_CUFFT(Complex *in_data, Complex *out_data, int nc, int nr, void *stream)
{
cufftHandle plan;
cufftResult ResPlan = cufftPlan2d(&plan, nc,nr, CUFFT_Z2Z);
cufftSetStream(plan, (cudaStream_t)stream);
cufftResult ResExec = cufftExecZ2Z(plan,
(cufftDoubleComplex*)in_data,
(cufftDoubleComplex*)out_data,
CUFFT_INVERSE);
cufftDestroy(plan);
}
DEFUN_DLD(testFFTGPU, args, ,
"main body;")
{
fprintf(stderr,"device type: %d\n", acc_get_device_type());
fprintf(stderr,"Num devices: %d\n", acc_get_num_devices(acc_device_nvidia));
ComplexMatrix Matrix(args(0).complex_matrix_value());
octave_value_list retval;
ComplexMatrix out(Matrix.dims());
double *pmat = reinterpret_cast<double *> (const_cast<Complex *>(Matrix.fortran_vec()));
double *pout = reinterpret_cast<double *> (const_cast<Complex *>(out.fortran_vec()));
// Complex *pmat = (Matrix.fortran_vec());
// Complex *pout = (out.fortran_vec());
static dim_vector dv = Matrix.dims();
int Nc = dv(0);
int Nr = dv(1);
#pragma acc data copy(pmat[0:Nc*Nr*2],pout[0:Nc*Nr*2])
{
void *stream = acc_get_cuda_stream(acc_async_sync);
#pragma acc host_data use_device(pmat,pout)
{
inv_CUFFT((Complex*)pmat,(Complex*)pout,Nc,Nr,stream);
}
#pragma acc parallel
for(int i=0;i<Nr*Nc*2;i++){
pout[i] = pout[i]/double(Nr*Nc);
}
}
retval(0) = out;
return retval;
}
$ sh run.sh
octave: X11 DISPLAY environment variable not set
octave: disabling GUI features
device type: 5
Num devices: 4
ans =
Columns 1 and 2:
0.0000e+00 + 0.0000e+00i 3.4694e-18 + 2.6021e-18i
6.9389e-18 - 8.6736e-18i 0.0000e+00 - 6.9389e-18i
-1.9082e-17 - 1.5613e-17i -1.7347e-18 - 3.4694e-18i
3.4694e-18 + 1.0408e-17i -5.4210e-18 + 3.4694e-18i
-2.7756e-17 - 1.0408e-17i 0.0000e+00 + 0.0000e+00i
4.5103e-17 + 0.0000e+00i 6.0715e-18 + 0.0000e+00i
-2.7756e-17 + 1.0408e-17i 0.0000e+00 + 0.0000e+00i
3.4694e-18 - 1.0408e-17i -2.3852e-18 - 4.3368e-18i
-1.9082e-17 + 1.5613e-17i -2.6021e-18 - 3.4694e-18i
6.9389e-18 + 8.6736e-18i 3.4694e-18 + 5.2042e-18i
Columns 3 and 4:
-1.5613e-17 + 0.0000e+00i -5.2042e-18 + 6.9389e-18i
1.0408e-17 - 3.4694e-18i 6.9389e-18 + 3.4694e-18i
0.0000e+00 - 6.7221e-18i 0.0000e+00 - 6.9389e-18i
-6.9389e-18 + 7.8063e-18i -6.9389e-18 + 0.0000e+00i
-3.4694e-18 + 6.9389e-18i -1.1276e-17 + 4.4452e-18i
-3.4694e-18 + 3.4694e-18i -3.4694e-18 + 0.0000e+00i
-3.9031e-18 + 0.0000e+00i -3.4694e-18 + 0.0000e+00i
6.9389e-18 + 0.0000e+00i -1.0408e-17 - 3.3314e-18i
0.0000e+00 - 3.4694e-18i 1.7347e-18 + 1.7347e-18i
-6.9389e-18 + 1.0408e-17i 0.0000e+00 - 3.4694e-18i
Columns 5 and 6:
-1.0408e-17 - 9.3241e-18i -3.9899e-17 + 0.0000e+00i
0.0000e+00 - 3.4694e-18i -5.6379e-18 + 3.8489e-18i
6.9389e-18 + 6.9389e-18i 6.9389e-18 + 1.1926e-18i
8.6736e-18 - 3.4694e-18i 0.0000e+00 + 8.6736e-18i
1.0408e-17 - 1.0408e-17i 6.9389e-18 + 1.7347e-17i
-1.0408e-17 + 1.0408e-17i -5.2042e-18 + 0.0000e+00i
0.0000e+00 + 0.0000e+00i 6.9389e-18 - 1.7347e-17i
0.0000e+00 + 8.6736e-18i 0.0000e+00 - 8.6736e-18i
3.4694e-18 + 3.4694e-18i 6.9389e-18 - 1.1926e-18i
0.0000e+00 + 3.4694e-18i -5.6379e-18 - 3.8489e-18i
Columns 7 and 8:
-1.0408e-17 + 9.3241e-18i -5.2042e-18 - 6.9389e-18i
0.0000e+00 - 3.4694e-18i 0.0000e+00 + 3.4694e-18i
3.4694e-18 - 3.4694e-18i 1.7347e-18 - 1.7347e-18i
0.0000e+00 - 8.6736e-18i -1.0408e-17 + 3.3314e-18i
0.0000e+00 + 0.0000e+00i -3.4694e-18 + 0.0000e+00i
-1.0408e-17 - 1.0408e-17i -3.4694e-18 + 0.0000e+00i
1.0408e-17 + 1.0408e-17i -1.1276e-17 - 4.4452e-18i
8.6736e-18 + 3.4694e-18i -6.9389e-18 + 0.0000e+00i
6.9389e-18 - 6.9389e-18i 0.0000e+00 + 6.9389e-18i
0.0000e+00 + 3.4694e-18i 6.9389e-18 - 3.4694e-18i
Columns 9 and 10:
-1.5613e-17 + 0.0000e+00i 3.4694e-18 - 2.6021e-18i
-6.9389e-18 - 1.0408e-17i 3.4694e-18 - 5.2042e-18i
0.0000e+00 + 3.4694e-18i -2.6021e-18 + 3.4694e-18i
6.9389e-18 + 0.0000e+00i -2.3852e-18 + 4.3368e-18i
-3.9031e-18 + 0.0000e+00i 0.0000e+00 + 0.0000e+00i
-3.4694e-18 - 3.4694e-18i 6.0715e-18 + 0.0000e+00i
-3.4694e-18 - 6.9389e-18i 0.0000e+00 + 0.0000e+00i
-6.9389e-18 - 7.8063e-18i -5.4210e-18 - 3.4694e-18i
0.0000e+00 + 6.7221e-18i -1.7347e-18 + 3.4694e-18i
1.0408e-17 + 3.4694e-18i 0.0000e+00 + 6.9389e-18i
octave: X11 DISPLAY environment variable not set
octave: disabling GUI features
device type: 5
Num devices: 4
ans =
Columns 1 and 2:
0.0000e+00 + 0.0000e+00i 3.4694e-18 + 2.6021e-18i
6.9389e-18 - 8.6736e-18i 0.0000e+00 - 6.9389e-18i
-1.9082e-17 - 1.5613e-17i -1.7347e-18 - 3.4694e-18i
3.4694e-18 + 1.0408e-17i -5.4210e-18 + 3.4694e-18i
-2.7756e-17 - 1.0408e-17i 0.0000e+00 + 0.0000e+00i
4.5103e-17 + 0.0000e+00i 6.0715e-18 + 0.0000e+00i
-2.7756e-17 + 1.0408e-17i 0.0000e+00 + 0.0000e+00i
3.4694e-18 - 1.0408e-17i -2.3852e-18 - 4.3368e-18i
-1.9082e-17 + 1.5613e-17i -2.6021e-18 - 3.4694e-18i
6.9389e-18 + 8.6736e-18i 3.4694e-18 + 5.2042e-18i
Columns 3 and 4:
-1.5613e-17 + 0.0000e+00i -5.2042e-18 + 6.9389e-18i
1.0408e-17 - 3.4694e-18i 6.9389e-18 + 3.4694e-18i
0.0000e+00 - 6.7221e-18i 0.0000e+00 - 6.9389e-18i
-6.9389e-18 + 7.8063e-18i -6.9389e-18 + 0.0000e+00i
-3.4694e-18 + 6.9389e-18i -1.1276e-17 + 4.4452e-18i
-3.4694e-18 + 3.4694e-18i -3.4694e-18 + 0.0000e+00i
-3.9031e-18 + 0.0000e+00i -3.4694e-18 + 0.0000e+00i
6.9389e-18 + 0.0000e+00i -1.0408e-17 - 3.3314e-18i
0.0000e+00 - 3.4694e-18i 1.7347e-18 + 1.7347e-18i
-6.9389e-18 + 1.0408e-17i 0.0000e+00 - 3.4694e-18i
Columns 5 and 6:
-1.0408e-17 - 9.3241e-18i -3.9899e-17 + 0.0000e+00i
0.0000e+00 - 3.4694e-18i -5.6379e-18 + 3.8489e-18i
6.9389e-18 + 6.9389e-18i 6.9389e-18 + 1.1926e-18i
8.6736e-18 - 3.4694e-18i 0.0000e+00 + 8.6736e-18i
1.0408e-17 - 1.0408e-17i 6.9389e-18 + 1.7347e-17i
-1.0408e-17 + 1.0408e-17i -5.2042e-18 + 0.0000e+00i
0.0000e+00 + 0.0000e+00i 6.9389e-18 - 1.7347e-17i
0.0000e+00 + 8.6736e-18i 0.0000e+00 - 8.6736e-18i
3.4694e-18 + 3.4694e-18i 6.9389e-18 - 1.1926e-18i
0.0000e+00 + 3.4694e-18i -5.6379e-18 - 3.8489e-18i
Columns 7 and 8:
-1.0408e-17 + 9.3241e-18i -5.2042e-18 - 6.9389e-18i
0.0000e+00 - 3.4694e-18i 0.0000e+00 + 3.4694e-18i
3.4694e-18 - 3.4694e-18i 1.7347e-18 - 1.7347e-18i
0.0000e+00 - 8.6736e-18i -1.0408e-17 + 3.3314e-18i
0.0000e+00 + 0.0000e+00i -3.4694e-18 + 0.0000e+00i
-1.0408e-17 - 1.0408e-17i -3.4694e-18 + 0.0000e+00i
1.0408e-17 + 1.0408e-17i -1.1276e-17 - 4.4452e-18i
8.6736e-18 + 3.4694e-18i -6.9389e-18 + 0.0000e+00i
6.9389e-18 - 6.9389e-18i 0.0000e+00 + 6.9389e-18i
0.0000e+00 + 3.4694e-18i 6.9389e-18 - 3.4694e-18i
Columns 9 and 10:
-1.5613e-17 + 0.0000e+00i 3.4694e-18 - 2.6021e-18i
-6.9389e-18 - 1.0408e-17i 3.4694e-18 - 5.2042e-18i
0.0000e+00 + 3.4694e-18i -2.6021e-18 + 3.4694e-18i
6.9389e-18 + 0.0000e+00i -2.3852e-18 + 4.3368e-18i
-3.9031e-18 + 0.0000e+00i 0.0000e+00 + 0.0000e+00i
-3.4694e-18 - 3.4694e-18i 6.0715e-18 + 0.0000e+00i
-3.4694e-18 - 6.9389e-18i 0.0000e+00 + 0.0000e+00i
-6.9389e-18 - 7.8063e-18i -5.4210e-18 - 3.4694e-18i
0.0000e+00 + 6.7221e-18i -1.7347e-18 + 3.4694e-18i
1.0408e-17 + 3.4694e-18i 0.0000e+00 + 6.9389e-18i
0 件のコメント:
コメントを投稿