2020年11月20日金曜日

FDM for the time dependent wave equation in Julia

I am writing a test code of our new time-dependent wave equation in Julia.
For now, it is equivalent to the normal wave equation and it looks fine.

I am not sure Julia is that fast and easy to write, but one of the merits is that the visualization functions can be called within Julia's framework.



 

2020年11月17日火曜日

Leetcode: Unique Paths

 To make some practice on DP, "Unique Paths"@Leetcode is solved.

Probably a typical one, and super easy.
https://leetcode.com/problems/unique-paths


import numpy as np

class Solution:
def uniquePaths(self, m: int, n: int) -> int:
dp = np.zeros((m,n),np.int32)
dp[0,:]=1
dp[:,0]=1
for j in range(1,m):
for i in range(1,n):
dp[j,i] = dp[j-1,i] + dp[j,i-1]
return dp[m-1][n-1]

2020年11月13日金曜日

Sudoku solver @LeetCode

 Not elegant at all, but readable sudoku solver.

https://leetcode.com/problems/sudoku-solver/

class Solution(object):
    
    def checkNew(selfi,j,k,board):
        
        for jj in range(9):
            if board[i][jj] == str(k):
                return False
        for ii in range(9):
            if board[ii][j] == str(k):
                return False
        
        iB = i//3
        jB = j//3
        b = []
        for ii in range(3):
            for jj in range(3):
                if board[3*iB+ii][3*jB+jj] == str(k):
                    return False
        
        
        return True
    
    def backtrack(selfboard):
# Find a blank element. The search direction does not a matter,
# but row-oriented here.
        for i in range(9):
            for j in range(9):
                if board[i][j] == ".":
# If blank, try numbers from 1 to 9
                    for k in range(9):
                        if self.checkNew(i,j,k+1,board): # Check if k is OK            
                            board[i][j]=str(k+1)
                            if self.backtrack(board): # Try next blank until OK
                                return True
                            else:
                                board[i][j] = "." # if inconsistent, step back
                    else:
                        return False
        return True
    
    
    def solveSudoku(selfboard):
        """
        :type board: List[List[str]]
        :rtype: None Do not return anything, modify board in-place instead.
        """

        self.backtrack(board)

Unique Paths III @LeetCode

Just to wake up; 

980Unique Paths III
https://leetcode.com/problems/unique-paths-iii/

class Solution:
    
    def checkIfReachable(self,curPoint,walkablegrid):
        (i,j) = curPoint
        #print(curPoint)
        if i < 0 or j < 0 or i >= len(grid) or j >= len(grid[0]):
            return 0
        
        if grid[i][j]==2:
            if len(walkable)==0:
                return 1
            else:
                return 0
        
        if curPoint not in walkable:
            return 0
        
        walkable.remove(curPoint)
        
        num1 = self.checkIfReachable((i+1,j  ),walkable.copy(),grid)
        num2 = self.checkIfReachable((i-1,j  ),walkable.copy(),grid)
        num3 = self.checkIfReachable((i  ,j+1),walkable.copy(),grid)
        num4 = self.checkIfReachable((i  ,j-1),walkable.copy(),grid)
        
        return num1+num2+num3+num4
    
    def uniquePathsIII(selfgrid: List[List[int]]) -> int:
        
        walkable = []
        
        for i in range(len(grid)):
            for j in range(len(grid[0])):
                if grid[i][j] == 0:
                    walkable.append((i,j))
                elif grid[i][j] == 1:
                    walkable.append((i,j))
                    start = (i,j)
        
        numPath = self.checkIfReachable(start,walkable,grid)
        return numPath

An alternative to Google Photo: Moments@Synology NAS

 Since Google photo will not be free after 1 June, I started looking for an alternative.

One of the candidates for me is "Moments" on my Synology NAS (DS218play).

DS Photo might be the first option for photo management, but it does not handle 360-deg photos properly (it just show as a big flat one).

Basically, Moments looks nice, but it seems it spins up HDDs from hibernate (not quite sure. It could be the same before).

2020年11月9日月曜日

TDPC C

 というわけで、TDPC C。

考え方は簡単で、
(1) 自分の対戦相手が今まで生き残ってくる確率
(2) 自分が勝てる確率
(3) 自分が今まで生き残って来れた確率

を掛け算して足しあわせると現在自分が勝ち上がれるかどうかの確率になる
(かどうか、いまいち直感的でないんだけど、答えはそれであってる)。

どっちかというと対戦相手を見つけてくるループが面倒で、私は予めリストアップした(groupがそれ)

https://atcoder.jp/contests/tdpc/submissions/18020128

import numpy as np

def calcProb(P,Q,R):
   RP = R[P]
   RQ = R[Q]

   prob = 1.0 / (1.0+10.0**((RQ-RP)/400.0))

   return prob

def calcConsequtiveProb(iP,iG,R,dp,iBat):
   prob = 0.0
   for i in iG:
      prob += calcProb(iP,i,R)*dp[iBat-1,i]

   return prob

N = int(input())
numPerson = 2**N
R = []
for i in range(numPerson):
   R.append(float(input()))

group = []
group.append([ [i] for i in range(numPerson)])
for i in range(1,N+1):
   locGroup = []
   prevGroup = group[i-1]
   for j in range(len(prevGroup)//2):

      lg1 = prevGroup[2*j  ]
      lg2 = prevGroup[2*j+1]
      locGroup.append(lg1+lg2)
   group.append(locGroup)

dp = np.zeros((N+1,numPerson),np.float)

dp[0,:] = 1.0

for iBat in range(1,N+1):
   locGrp = group[iBat-1]

   for iGrp in range(len(locGrp)//2):
      lg1 = locGrp[iGrp*2  ]
      lg2 = locGrp[iGrp*2+1]

      for iPerson in lg1:
         prob = calcConsequtiveProb(iPerson,lg2,R,dp,iBat)
         dp[iBat,iPerson] = dp[iBat-1,iPerson]*prob
      for iPerson in lg2:
         prob = calcConsequtiveProb(iPerson,lg1,R,dp,iBat)
         dp[iBat,iPerson] = dp[iBat-1,iPerson]*prob

for i in range(numPerson):
   print(f'{dp[N,i]:.8f}')

2020年11月6日金曜日

Quantum Fourier Transform (QFT) on Q# (3 qubits) Part2

 QFT with three qubits is implemented in Q#.
The last post was to initialize the original state (entangled over three qubits).
I feel the measurement part should be as this post (the last one is incorrect).

Formulation is;
https://dojo.qulacs.org/ja/latest/notebooks/2.3_quantum_Fourier_transform.html#

The result is;
|000>: 1000, others: 0.
This agrees with the result of the above site :)!

namespace QFT_naive {
    open Microsoft.Quantum.Canon;
    open Microsoft.Quantum.Intrinsic;
    open Microsoft.Quantum.Convert;
    open Microsoft.Quantum.Arrays;
    

    @EntryPoint()
    operation SayHello() : Unit {
        Message("Hello quantum world!");
        mutable counts = new Int[8];
        mutable nres0 = 0;
        mutable nres1 = 0;
        mutable nres2 = 0;

        for(trian in 1..1000){
        using ( (input) = (Qubit[3] ) ){
            // 1/sqrt(8) (|000> + |001> + ,,, + |111>)
            H(input[0]);
            H(input[1]);
            H(input[2]);

            // Star QFT
            // R1 = Z, R2=S, R3=T
            H(input[0]);
            (Controlled S)([input[1]],input[0]);
            (Controlled T)([input[2]],input[0]);

            H(input[1]);
            (Controlled S)([input[2]],input[1]);

            H(input[2]);

            SWAP(input[2],input[0]);
            //CNOT(input[0], input[2]);
            //CNOT(input[2], input[0]);
            //CNOT(input[0], input[2]);

            // Start Measuring
            let res0 = M(input[0]);
            let res1 = M(input[1]);
            let res2 = M(input[2]);
            if (res0 == Zero){
                set nres0 = 0;
            }else{
                set nres0 = 1;
            }
            if (res1 == Zero){
                set nres1 = 0;
            }else{
                set nres1 = 1;
            }
            if (res2 == Zero){
                set nres2 = 0;
            }else{
                set nres2 = 1;
            }
            let bit = nres0 + nres1*2 + nres2*4;
            set counts w/= bit <- counts[bit]+ 1;
            ResetAll(input);
        }
        }
        for (j in 0..7) {
           Message(IntAsString(counts[j]));
        }
    }
}

TDPC(Typical DP Contest) A

 二次元配列にしないで解いてる人もいるみたいだけど私にはさっぱりわからないのでとりあえず二次元で。
”これまでに作成可能な点数であることが確認できれば、現在確認中のスコアを足した点数も作成可能である”ことを二次元配列で表現。

最終的には、最後の行を見れば作れる点数の個数がわかる。

https://atcoder.jp/contests/tdpc/tasks/tdpc_contest

import numpy as np

N = int(input())
line = input().split()
p = []
p.append(0)
for i in line:
   p.append(int(i))

dp = np.zeros((len(p),sum(p)+1),np.int8)

dp[0,0]=1
for j in range(1,len(p)):
   for i in range(0,sum(p)+1):
      if dp[j-1,i]:
         dp[j,i+p[j]] = 1
         dp[j,i] = 1

#print(dp)
print(sum(dp[len(p)-1,:]))

2020年11月2日月曜日

ABC129 C

DP の勉強のために下記ブログの問題リストを解いてみている。
https://qiita.com/drken/items/dc53c683d6de8aeacf5a

DPだとわかっているからできるけど、自分でいきなりはまだ思いつけなさそう。
https://atcoder.jp/contests/abc129/tasks/abc129_c

import sys

line = input().split()
N = int(line[0])
M = int(line[1])

dp = [0] * (N+1)
NA = [1] * (N+1)

for i in range(M):
   NA[int(input())] = 0

for i in range(N):
   if NA[i+1] == 0 and NA[i]==0:
      print(0)
      sys.exit(0)

dp[0] = 1
dp[1] = 1

for i in range(2,N+1):
   if NA[i] == 0:
      dp[i] = dp[i-1]
   elif NA[i-1] == 0:
      dp[i] = dp[i-1]
   else:
      dp[i] = dp[i-1]*NA[i-1] + dp[i-2]*NA[i-2]

print(dp[N]%1000000007)

 

2020年10月23日金曜日

Paiza Rank-A

After several attempts, I reached Rank-A in Paiza. 
Hope this helps my job hunting :)


Quantum Fourier Transform (QFT) Part 1

I'm trying to implement Quantum Fourier Transform (QFT), not from "Quantum Algorithm Implementations for Beginners" but Quantum Dojo (https://dojo.qulacs.org/ja/latest/notebooks/2.3_quantum_Fourier_transform.html# ).
That's because "Beginners" does not explain the algorithm in detail.

Quantum Dojo's one needs the computation of  1/sqrt(8)(|000>+|001>+,,,+|111> (in 3 qubits case), which is not clear in Q#.

I think this could be formed by applying the Adamard Gate for all qubits, and in order to make sure that I am correct, I have implemented the following code.

The results are:117, 107, 125, 138, 128, 144, 133, 108.
Although there are errors because of random numbers, in general looks OK.

namespace QFT_naive {
    open Microsoft.Quantum.Canon;
    open Microsoft.Quantum.Intrinsic;
    open Microsoft.Quantum.Convert;
    open Microsoft.Quantum.Arrays;
    

    @EntryPoint()
    operation SayHello() : Unit {
        Message("Hello quantum world!");
        mutable counts = new Int[8];
        mutable nres0 = 0;
        mutable nres1 = 0;
        mutable nres2 = 0;

        for(trian in 1..1000){
        using ( (input) = (Qubit[3] ) ){
            // 1/sqrt(8) (|000> + |001> + ,,, + |111>)
            H(input[0]);
            H(input[1]);
            H(input[2]);

            let res0 = M(input[0]);
            let res1 = M(input[1]);
            let res2 = M(input[2]);
            if (res0 == Zero){
                set nres0 = 1;
            }else{
                set nres0 = 0;
            }
            if (res1 == Zero){
                set nres1 = 1;
            }else{
                set nres1 = 0;
            }
            if (res2 == Zero){
                set nres2 = 1;
            }else{
                set nres2 = 0;
            }
            let bit = nres0 + nres1*2 + nres2*4;
            set counts w/= bit <- counts[bit]+ 1;
            ResetAll(input);
        }
        }
        for (j in 0..7) {
           Message(IntAsString(counts[j]));
        }
    }
}

2020年10月16日金曜日

With NEW SDK "Rewriting sample codes in Quantum Algorithm Implementations for Beginners (Bell state)"

 Since the new SDK does not use Driver.cs, the Bell state example needs to be rewritten without the C# part.

Now you only need to write Program.qs, and the example looks like,
Result is |00> 500, |01> 0, |10> 0, |11> 500. Looks still valid.

namespace QFT_naive {
    open Microsoft.Quantum.Canon;
    open Microsoft.Quantum.Intrinsic;
    open Microsoft.Quantum.Convert;
    

    @EntryPoint()
    operation SayHello() : Unit {
        Message("Hello quantum world!");
        mutable n00 = 0;
        mutable n01 = 0;
        mutable n10 = 0;
        mutable n11 = 0;

        for(trian in 1..1000){
        using ( (qubit0, qubit1) = (Qubit(), Qubit()) ){
            H(qubit0);
            CNOT(qubit0,qubit1);

            let res0 = M(qubit0);
            let res1 = M(qubit1);

            if(res0 == Zero){
                if(res1 == Zero){
                    set n00 += 1;
                }else{
                    set n01 += 1;
                }
            }else{
                if(res1 == Zero){
                    set n10 += 1;
                }else{
                    set n11 += 1;
                }

            }
            Reset(qubit0);
            Reset(qubit1);
        }
        }
        Message(IntAsString(n00));
        Message(IntAsString(n01));
        Message(IntAsString(n10));
        Message(IntAsString(n11));
    }
}

2020年8月13日木曜日

FPGA + OpenCL on Coursera (exercise in Week3)

 Again, someone who has no experience may need some help;


SimpleKernel.cl

1
2
3
4
5
6
7
8
9
//ACL Kernel
__kernel void SimpleKernel (__global float* restrict x, __global float* restrict y, __global float* restrict z, uint vectorSize)
{
    int i;

    for(i=0; i<vectorSize; i++){
       z[i] =x[i]*y[i];
    }
}


main.cpp
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include <math.h>
#include <fstream>
#include <stdio.h>
#include <string>

#include "CL/cl.hpp"
#include "utility.h"

static const cl_uint vectorSize = 4096; //must be evenly divisible by workSize
static const cl_uint workSize = 256;

//#define EXERCISE1

int main(void)
{
	cl_int err;

	//Setup Platform
	//Get Platform ID
	std::vector<cl::Platform> PlatformList;

	////////////// Exercise 1 Step 2.3
	err = cl::Platform::get(&PlatformList);
	checkErr(err, "Get Platform List");
	checkErr(PlatformList.size()>=1 ? CL_SUCCESS : -1, "cl::Platform::get");
	print_platform_info(&PlatformList);
	//Look for Fast Emulation Platform
	uint current_platform_id=get_platform_id_with_string(&PlatformList, "Emulation");
	printf("Using Platform: %d\n\n", current_platform_id);
	
	//Setup Device
	//Get Device ID
	std::vector<cl::Device> DeviceList;

	////////////// Exercise 1 Step 2.5
	err = PlatformList[current_platform_id].getDevices(CL_DEVICE_TYPE_ALL, &DeviceList);
	checkErr(err, "Get Devices");
	print_device_info(&DeviceList);
	
	//Create Context
	////////////// Exercise 1 Step 2.6 
	cl::Context mycontext = cl::Context(DeviceList);

	checkErr(err, "Context Constructor");
	
	//Create Command queue
	////////////// Exercise 1 Step 2.7
	cl::CommandQueue myqueue = cl::CommandQueue(mycontext , DeviceList[0], 0, &err);
	checkErr(err, "Queue Constructor");

	//Create Buffers for input and output
	////////////// Exercise 1 Step 2.8
	cl::Buffer Buffer_In (mycontext, CL_MEM_READ_ONLY ,  vectorSize * sizeof(cl_float));
	cl::Buffer Buffer_In2(mycontext, CL_MEM_READ_ONLY ,  vectorSize * sizeof(cl_float));
	cl::Buffer Buffer_Out(mycontext, CL_MEM_WRITE_ONLY,  vectorSize * sizeof(cl_float));

	//Inputs and Outputs to Kernel, X and Y are inputs, Z is output
	//The aligned attribute is used to ensure alignment
	//so that DMA could be used if we were working with a real FPGA board
	cl_float X[vectorSize]  __attribute__ ((aligned (64)));
	cl_float Y[vectorSize]  __attribute__ ((aligned (64)));
	cl_float Z[vectorSize]  __attribute__ ((aligned (64)));

	//Allocates memory with value from 0 to 1000
	cl_float LO= 0;   cl_float HI=1000;
	fill_generate(X, Y, Z, LO, HI, vectorSize);

	//Write data to device
	////////////// Exercise 1 Step 2.9
	err = myqueue.enqueueWriteBuffer(Buffer_In , true, 0, vectorSize * sizeof(cl_float), X);
	checkErr(err, "WriteBuffer");
	err = myqueue.enqueueWriteBuffer(Buffer_In2, true, 0, vectorSize * sizeof(cl_float), Y);
	checkErr(err, "WriteBuffer 2");
	myqueue.finish();

#ifndef EXERCISE1
	// create the kernel
	const char *kernel_name = "SimpleKernel";

	//Read in binaries from file
	std::ifstream aocx_stream("../SimpleKernel.aocx", std::ios::in|std::ios::binary);
	checkErr(aocx_stream.is_open() ? CL_SUCCESS:-1, "SimpleKernel.aocx");
	std::string prog(std::istreambuf_iterator<char>(aocx_stream), (std::istreambuf_iterator<char>()));
	cl::Program::Binaries mybinaries (1, std::make_pair(prog.c_str(), prog.length()));

	// Create the Program from the AOCX file.
	////////////////////// Exercise 2 Step 2.3    ///////////////////
	cl::Program myprogram(mycontext, DeviceList, mybinaries);
	checkErr(err, "Program Constructor");

	// build the program
	//////////////      Compile the Kernel.... For Intel FPGA, nothing is done here, but this conforms to the standard
	//////////////       Exercise 2   Step 2.4    ///////////////////
	err= myprogram.build(DeviceList,NULL);
	checkErr(err, "Build Program");

	// create the kernel
	//////////////       Find Kernel in Program
	//////////////       Exercise 2   Step 2.5    ///////////////////
	cl::Kernel mykernel(myprogram, kernel_name,&err);
	checkErr(err, "Kernel Creation");

	//////////////     Set Arguments to the Kernels
	//////////////       Exercise 2   Step 2.6    ///////////////////
	err = mykernel.setArg(0, Buffer_In);
	checkErr(err, "Arg 0");
	err = mykernel.setArg(1, Buffer_In2);
	checkErr(err, "Arg 1");
	err = mykernel.setArg(2, Buffer_Out);
	checkErr(err, "Arg 2");
	err = mykernel.setArg(3, vectorSize);
	checkErr(err, "Arg 3");


	printf("\nLaunching the kernel...\n");
	
	// Launch Kernel
	//////////////       Exercise 2   Step 2.7    ///////////////////
	err= myqueue.enqueueNDRangeKernel(mykernel,cl::NullRange,cl::NDRange(1),cl::NullRange);
	checkErr(err, "Kernel Execution");

	// read the output
	//////////////       Exercise 2   Step 2.8    ///////////////////
	err= myqueue.enqueueReadBuffer(Buffer_Out , true, 0, vectorSize * sizeof(cl_float), Z);
	checkErr(err, "Read Buffer");

	err=myqueue.finish();
	checkErr(err, "Finish Queue");
	
	float CalcZ[vectorSize];

	for (uint i=0; i<vectorSize; i++)
	{
		//////////////  Equivalent Code running on CPUs
		//////////////       Exercise 2   Step 2.9    ///////////////////
		CalcZ[i] = X[i]*Y[i];
				
	}

	//Print Performance Results
	verification (X, Y, Z, CalcZ, vectorSize);

#endif

    return 1;
}

FPGA + OpenCL on Coursera (exercise in Week2)

 To be honest, the exercise is a bit inconsiderate, and so, I would like to show mine here.


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include <math.h>
#include <fstream>
#include <stdio.h>
#include <string>

#include "CL/cl.hpp"
#include "utility.h"

static const cl_uint vectorSize = 4096; //must be evenly divisible by workSize
static const cl_uint workSize = 256;

#define EXERCISE1

int main(void)
{
	cl_int err;

	//Setup Platform
	//Get Platform ID
	std::vector<cl::Platform> PlatformList;

	////////////// Exercise 1 Step 2.3
	err = cl::Platform::get(&PlatformList);
	checkErr(err, "Get Platform List");
	checkErr(PlatformList.size()>=1 ? CL_SUCCESS : -1, "cl::Platform::get");
	print_platform_info(&PlatformList);
	//Look for Fast Emulation Platform
	uint current_platform_id=get_platform_id_with_string(&PlatformList, "Emulation");
	printf("Using Platform: %d\n\n", current_platform_id);
	
	//Setup Device
	//Get Device ID
	std::vector<cl::Device> DeviceList;

	////////////// Exercise 1 Step 2.5
	err = PlatformList[current_platform_id].getDevices(CL_DEVICE_TYPE_ALL, &DeviceList);
	checkErr(err, "Get Devices");
	print_device_info(&DeviceList);
	
	//Create Context
	////////////// Exercise 1 Step 2.6 
	cl::Context mycontext = cl::Context(DeviceList);

	checkErr(err, "Context Constructor");
	
	//Create Command queue
	////////////// Exercise 1 Step 2.7
	cl::CommandQueue myqueue = cl::CommandQueue(mycontext , DeviceList[0], 0, &err);
	checkErr(err, "Queue Constructor");

	//Create Buffers for input and output
	////////////// Exercise 1 Step 2.8
	cl::Buffer Buffer_In (mycontext, CL_MEM_READ_ONLY ,  vectorSize * sizeof(cl_float));
	cl::Buffer Buffer_In2(mycontext, CL_MEM_READ_ONLY ,  vectorSize * sizeof(cl_float));
	cl::Buffer Buffer_Out(mycontext, CL_MEM_WRITE_ONLY,  vectorSize * sizeof(cl_float));

	//Inputs and Outputs to Kernel, X and Y are inputs, Z is output
	//The aligned attribute is used to ensure alignment
	//so that DMA could be used if we were working with a real FPGA board
	cl_float X[vectorSize]  __attribute__ ((aligned (64)));
	cl_float Y[vectorSize]  __attribute__ ((aligned (64)));
	cl_float Z[vectorSize]  __attribute__ ((aligned (64)));

	//Allocates memory with value from 0 to 1000
	cl_float LO= 0;   cl_float HI=1000;
	fill_generate(X, Y, Z, LO, HI, vectorSize);

	//Write data to device
	////////////// Exercise 1 Step 2.9
	err = myqueue.enqueueWriteBuffer(Buffer_In , true, 0, vectorSize * sizeof(cl_float), X);
	checkErr(err, "WriteBuffer");
	err = myqueue.enqueueWriteBuffer(Buffer_In2, true, 0, vectorSize * sizeof(cl_float), Y);
	checkErr(err, "WriteBuffer 2");
	myqueue.finish();

#ifndef EXERCISE1
	// create the kernel
	const char *kernel_name = "SimpleKernel";

	//Read in binaries from file
	std::ifstream aocx_stream("../SimpleKernel.aocx", std::ios::in|std::ios::binary);
	checkErr(aocx_stream.is_open() ? CL_SUCCESS:-1, "SimpleKernel.aocx");
	std::string prog(std::istreambuf_iterator<char>(aocx_stream), (std::istreambuf_iterator<char>()));
	cl::Program::Binaries mybinaries (1, std::make_pair(prog.c_str(), prog.length()));

	// Create the Program from the AOCX file.
	////////////////////// Exercise 2 Step 2.3    ///////////////////
	cl::Program
	checkErr(err, "Program Constructor");

	// build the program
	//////////////      Compile the Kernel.... For Intel FPGA, nothing is done here, but this conforms to the standard
	//////////////       Exercise 2   Step 2.4    ///////////////////
	err=
	checkErr(err, "Build Program");

	// create the kernel
	//////////////       Find Kernel in Program
	//////////////       Exercise 2   Step 2.5    ///////////////////
	cl::Kernel
	checkErr(err, "Kernel Creation");

	//////////////     Set Arguments to the Kernels
	//////////////       Exercise 2   Step 2.6    ///////////////////
	err =
	checkErr(err, "Arg 0");
	err =
	checkErr(err, "Arg 1");
	err =
	checkErr(err, "Arg 2");
	err =
	checkErr(err, "Arg 3");


	printf("\nLaunching the kernel...\n");
	
	// Launch Kernel
	//////////////       Exercise 2   Step 2.7    ///////////////////
	err=
	checkErr(err, "Kernel Execution");

	// read the output
	//////////////       Exercise 2   Step 2.8    ///////////////////
	err=
	checkErr(err, "Read Buffer");

	err=myqueue.finish();
	checkErr(err, "Finish Queue");
	
	float CalcZ[vectorSize];

	for (uint i=0; i<vectorSize; i++)
	{
		//////////////  Equivalent Code running on CPUs
		//////////////       Exercise 2   Step 2.9    ///////////////////
		CalcZ[i] =
				
	}

	//Print Performance Results
	verification (X, Y, Z, CalcZ, vectorSize);

#endif

    return 1;
}

FPGA + OpenCL environment building

 An environment should be built by myself, and I am writing down what I have done so far to get it.


(1) Install Centos 7 (this is the requirement by Intel's SDK)
(2) Download SDK from here. Version 19.1 might be the best for the course. (30GB!)
(3) Install everything under /root. Root access is necessary for the emulator.
(4) Install eclipse. Download from here. The latest version might be OK.

I tried to use an eclipse included in SDK, but there was an issue in a menu window.
Actually, no need to stick to the bundled one, at least for the exercise in Week2.

Coursera's FPGA + OpenCL course

As a part of my training duties, I have just started a course on Coursera.
It seems, it is actually just a copy of Intel's one but OK to me.


https://www.coursera.org/learn/opencl-fpga-introduction

2020年8月5日水曜日

ハンガリーで結婚式に参加した

妻の同僚のハンガリー人が結婚式に私も招待してくれたので、参加してきました。
ブダペストから更に来るまで1時間半もかかるようなところなので、
前泊(正確には前々日に移動だけ)して、ブダペスト観光しつつ、当日は新郎新婦の友人に車で送ってもらいました(外国人には公共交通機関での移動は無理っぽい)。

式が14時始まりで、披露宴が終わるのが4AMという死にそうな予定で、もちろん我々は途中で諦めましたが、それでも1時。翌日は新郎新婦の車に乗せてもらってウィーンまで帰ったのですが、途中で逃げた我々より新郎新婦のほうがしっかりしていた。体力の違いか、若さか。

写真は観光に行った、Gerbeaudという喫茶店、Matthias Church, 世界遺産になってるメトロ(M1線)、鎖橋、結婚式の式次第に式の様子。








2020年8月4日火曜日

List of gates available in Q#

I always lose the list of Q#, and so, I would like to put it as memo here.

https://docs.microsoft.com/en-us/qsharp/api/qsharp/microsoft.quantum.intrinsic

Grover's algorithm (Algorithm 1, in Section 2)

Continuation of Q# practice (Bell state).
Grover's algorithm described in Section 2, Algorithm 1.
In this example, Oracle is expressed just with Toffoli (CCNOT) gate, and this is not a general implementation, as far as I understand.

In the literature, they constructed CCNOT with natively supported gates, but Q# has it, and I just used it (control and ancillary are a bit confusing to me though).

By running my sample code the numbers of measurements observed in 1000 trials are;
00:0, 01:0, 10:0, 11:1000
Unlike the previous one, the result is completely the same as the theory.

I feel I should split Q# and C# parts in a better way.
In any case, I am getting used to it, hopefully.

Sample codes are;
Q# (Program.qs)
/// # Summary
/// 
namespace Grover {

    open Microsoft.Quantum.Canon;
    open Microsoft.Quantum.Intrinsic;
    

    operation Grover() : (Int,Int,Int,Int) {
        Message("This is Q# implementation of Algorithm 1 in Quantum Algorithm Implementaions for Beginners");

        mutable n00 = 0;
        mutable n01 = 0;
        mutable n10 = 0;
        mutable n11 = 0;

        for(trial in 1..1000){
        using ( (qubit0, qubit1, qubit2) = (Qubit(), Qubit(),Qubit()) ){
            // qubit0: ancillar
            // qubit1, 2: x1, x2

            //Initializaiton
            X(qubit0);
            H(qubit0);
            H(qubit1);
            H(qubit2);
            // qubit0 is called ancillar, looking at the following site and the article, it should be the target
            // https://quantumcomputing.stackexchange.com/questions/3943/how-do-you-implement-the-toffoli-gate-using-only-single-qubit-and-cnot-gates
            CCNOT(qubit1,qubit2,qubit0); //Toffoli gate

            // Grover diffusion operator
            H(qubit1);
            X(qubit1);
            H(qubit1);

            H(qubit2);
            X(qubit2);

            CNOT(qubit2,qubit1);

            H(qubit1);
            X(qubit1);
            H(qubit1);

            X(qubit2);
            H(qubit2);

            let res0 = M(qubit1);
            let res1 = M(qubit2);
            if(res0 == Zero){
                if(res1 == Zero){
                    set n00 += 1;
                }else{
                    set n01 += 1;
                }
            }else{
                if(res1 == Zero){
                    set n10 += 1;
                }else{
                    set n11 += 1;
                }

            }
            Reset(qubit0);
            Reset(qubit1);
            Reset(qubit2);
        }
        }
        return(n00,n01,n10,n11);
    }
}



C# (Driver.cs)
using System;

using Microsoft.Quantum.Simulation.Core;
using Microsoft.Quantum.Simulation.Simulators;

namespace Grover
{
    class Driver
    {
        static void Main(string[] args)
        {
            
            using (var qsim = new QuantumSimulator())
            {
                long a,b,c,d;
                (a,b,c,d) = Grover.Run(qsim).Result;
                Console.WriteLine("{0},{1},{2},{3}",a,b,c,d);
            }
        }
    }
}