forked from gcielniak/OpenCL-Tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Just saving for reference before merging:
- Tutorial 1:
--- "src/Tutorial 1.cpp" 2025-06-22 21:06:37.057787000 -0400
+++ "Tutorial 1.cpp" 2025-06-22 21:06:37.050783000 -0400
@@ -54,7 +54,7 @@
throw err;
}
- //Part 4 - memory allocation
+ //Part 3 - memory allocation
//host - input
std::vector<int> A = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; //C++11 allows this type of initialisation
std::vector<int> B = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
@@ -70,13 +70,13 @@
cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, vector_size);
cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, vector_size);
- //Part 5 - device operations
+ //Part 4 - device operations
- //5.1 Copy arrays A and B to device memory
+ //4.1 Copy arrays A and B to device memory
queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, vector_size, &A[0]);
queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, vector_size, &B[0]);
- //5.2 Setup and execute the kernel (i.e. device code)
+ //4.2 Setup and execute the kernel (i.e. device code)
cl::Kernel kernel_add = cl::Kernel(program, "add");
kernel_add.setArg(0, buffer_A);
kernel_add.setArg(1, buffer_B);
@@ -84,7 +84,7 @@
queue.enqueueNDRangeKernel(kernel_add, cl::NullRange, cl::NDRange(vector_elements), cl::NullRange);
- //5.3 Copy the result from device to host
+ //4.3 Copy the result from device to host
queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, vector_size, &C[0]);
std::cout << "A = " << A << std::endl;
- Tutorial 2:
--- "src/Tutorial 2.cpp" 2025-06-22 21:06:37.068775000 -0400
+++ "Tutorial 2.cpp" 2025-06-22 21:06:37.061813000 -0400
@@ -82,12 +82,12 @@
// queue.enqueueWriteBuffer(dev_convolution_mask, CL_TRUE, 0, convolution_mask.size()*sizeof(float), &convolution_mask[0]);
//4.2 Setup and execute the kernel (i.e. device code)
- cl::Kernel kernel = cl::Kernel(program, "identityND");
+ cl::Kernel kernel = cl::Kernel(program, "identity");
kernel.setArg(0, dev_image_input);
kernel.setArg(1, dev_image_output);
// kernel.setArg(2, dev_convolution_mask);
- queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(image_input.width(), image_input.height(), image_input.spectrum()), cl::NullRange);
+ queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(image_input.size()), cl::NullRange);
vector<unsigned char> output_buffer(image_input.size());
//4.3 Copy the result from device to host
diff -ru kernels/my_kernels.cl src/kernels/my_kernels.cl
--- kernels/my_kernels.cl 2025-06-22 21:06:37.066781000 -0400
+++ src/kernels/my_kernels.cl 2025-06-22 21:06:37.070804000 -0400
@@ -9,7 +9,6 @@
int image_size = get_global_size(0)/3; //each image consists of 3 colour channels
int colour_channel = id / image_size; // 0 - red, 1 - green, 2 - blue
- //this is just a copy operation, modify to filter out the individual colour channels
B[id] = A[id];
}
@@ -30,7 +29,7 @@
}
//2D averaging filter
-kernel void avg_filterND(global const uchar* A, global uchar* B) {
+kernel void avg_filter2D(global const uchar* A, global uchar* B) {
int width = get_global_size(0); //image width in pixels
int height = get_global_size(1); //image height in pixels
int image_size = width*height; //image size in pixels
@@ -42,24 +41,19 @@
int id = x + y*width + c*image_size; //global id in 1D space
- uint result = 0;
+ ushort result = 0;
- //simple boundary handling - just copy the original pixel
- if ((x == 0) || (x == width-1) || (y == 0) || (y == height-1)) {
- result = A[id];
- } else {
- for (int i = (x-1); i <= (x+1); i++)
- for (int j = (y-1); j <= (y+1); j++)
- result += A[i + j*width + c*image_size];
+ for (int i = (x-1); i <= (x+1); i++)
+ for (int j = (y-1); j <= (y+1); j++)
+ result += A[i + j*width + c*image_size];
- result /= 9;
- }
+ result /= 9;
B[id] = (uchar)result;
}
//2D 3x3 convolution kernel
-kernel void convolutionND(global const uchar* A, global uchar* B, constant float* mask) {
+kernel void convolution2D(global const uchar* A, global uchar* B, constant float* mask) {
int width = get_global_size(0); //image width in pixels
int height = get_global_size(1); //image height in pixels
int image_size = width*height; //image size in pixels
@@ -71,16 +65,11 @@
int id = x + y*width + c*image_size; //global id in 1D space
- float result = 0;
+ ushort result = 0;
- //simple boundary handling - just copy the original pixel
- if ((x == 0) || (x == width-1) || (y == 0) || (y == height-1)) {
- result = A[id];
- } else {
- for (int i = (x-1); i <= (x+1); i++)
- for (int j = (y-1); j <= (y+1); j++)
- result += A[i + j*width + c*image_size]*mask[i-(x-1) + j-(y-1)];
- }
+ for (int i = (x-1); i <= (x+1); i++)
+ for (int j = (y-1); j <= (y+1); j++)
+ result += A[i + j*width + c*image_size]*mask[i-(x-1) + j-(y-1)];
B[id] = (uchar)result;
}
\ No newline at end of file
- Tutorial 3:
diff -ru kernels/my_kernels.cl src/kernels/my_kernels.cl
--- kernels/my_kernels.cl 2025-06-22 21:06:37.110794000 -0400
+++ src/kernels/my_kernels.cl 2025-06-22 21:06:37.114780000 -0400
@@ -195,7 +195,7 @@
kernel void scan_add_atomic(global int* A, global int* B) {
int id = get_global_id(0);
int N = get_global_size(0);
- for (int i = id+1; i < N && id < N; i++)
+ for (int i = id+1; i < N; i++)
atomic_add(&B[i], A[id]);
}
Metadata
Metadata
Assignees
Labels
No labels