Skip to content

Commit

Permalink
cuda: add comments to array-add.cu
Browse files Browse the repository at this point in the history
  • Loading branch information
danbev committed Feb 7, 2025
1 parent f4a4481 commit eb7f2f8
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 11 deletions.
1 change: 1 addition & 0 deletions gpu/cuda/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ info
wmma
streams
graphs
array-add.ptx
8 changes: 6 additions & 2 deletions gpu/cuda/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ info: src/info.cu
nvcc -o $@ $<

wmma: src/wmma.cu
# GeForce RTX 4080 has compute compatibility 8.6 (https://developer.nvidia.com/cuda-gpus)
# GeForce RTX 4070 has compute compatibility 8.6 (https://developer.nvidia.com/cuda-gpus)
nvcc -arch=sm_89 -o $@ $<

hello-world-ptx: src/hello-world.cu
Expand All @@ -27,7 +27,11 @@ graphs: src/graphs.cu
dump-array-add: array-add
cuobjdump $<

array-add-ptx:
nvcc -ptx src/array-add.cu
@cat array-add.ptx


.PHONY: clean
clean:
${RM} hello-world threads inc hello-world.ptx minimal wmma streams graphs array-add
@${RM} threads inc hello-world.ptx info wmma streams graphs array-add
20 changes: 11 additions & 9 deletions gpu/cuda/src/array-add.cu
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include <stdio.h>

__global__ void add_arrays(int *a, int *b, int *c, int size) {
// Kernal function that runs on the GPU
__global__ void add_arrays(int* a, int* b, int* c, int size) {
printf("blockIdx.x = %d, blockDim.x = %d, threadIdx.x = %d\n", blockIdx.x, blockDim.x, threadIdx.x);
int i = blockIdx.x * blockDim.x + threadIdx.x;
printf("GPU: adding i = %d\n", i);
if (i < size) {
c[i] = a[i] + b[i];
// Calculate the index of array index that this thread will process.
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < size) {
c[idx] = a[idx] + b[idx];
printf("[GPU] array index [%d]: adding %d + %d = %d\n", idx, a[idx], b[idx], c[idx]);
}
}

Expand Down Expand Up @@ -42,9 +44,9 @@ int main() {
cudaMemcpy(d_a, h_a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, h_b, size, cudaMemcpyHostToDevice);

dim3 grid(1);
dim3 blocks(N);
add_arrays<<<grid, blocks>>>(d_a, d_b, d_c, N);
dim3 blocks(2); // blocks per grid
dim3 threads(3); // threads per block
add_arrays<<<blocks, threads>>>(d_a, d_b, d_c, N);

cudaDeviceSynchronize();

Expand All @@ -54,7 +56,7 @@ int main() {
return 1;
}

// Copy the array that the device has incremented back to the host
// Copy the array that the device has computed back to the host
cudaMemcpy(h_c, d_c, size, cudaMemcpyDeviceToHost);

printf("Added on GPU:\n");
Expand Down

0 comments on commit eb7f2f8

Please sign in to comment.