Skip to content

diffs between multi_os and originals in the upstream repo #3

@smokhov

Description

@smokhov

Just saving for reference before merging:

  • Tutorial 1:
--- "src/Tutorial 1.cpp"	2025-06-22 21:06:37.057787000 -0400
+++ "Tutorial 1.cpp"	2025-06-22 21:06:37.050783000 -0400
@@ -54,7 +54,7 @@
 			throw err;
 		}
 
-		//Part 4 - memory allocation
+		//Part 3 - memory allocation
 		//host - input
 		std::vector<int> A = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; //C++11 allows this type of initialisation
 		std::vector<int> B = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 };
@@ -70,13 +70,13 @@
 		cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, vector_size);
 		cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, vector_size);
 
-		//Part 5 - device operations
+		//Part 4 - device operations
 
-		//5.1 Copy arrays A and B to device memory
+		//4.1 Copy arrays A and B to device memory
 		queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, vector_size, &A[0]);
 		queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, vector_size, &B[0]);
 
-		//5.2 Setup and execute the kernel (i.e. device code)
+		//4.2 Setup and execute the kernel (i.e. device code)
 		cl::Kernel kernel_add = cl::Kernel(program, "add");
 		kernel_add.setArg(0, buffer_A);
 		kernel_add.setArg(1, buffer_B);
@@ -84,7 +84,7 @@
 
 		queue.enqueueNDRangeKernel(kernel_add, cl::NullRange, cl::NDRange(vector_elements), cl::NullRange);
 
-		//5.3 Copy the result from device to host
+		//4.3 Copy the result from device to host
 		queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, vector_size, &C[0]);
 
 		std::cout << "A = " << A << std::endl;
  • Tutorial 2:
--- "src/Tutorial 2.cpp"	2025-06-22 21:06:37.068775000 -0400
+++ "Tutorial 2.cpp"	2025-06-22 21:06:37.061813000 -0400
@@ -82,12 +82,12 @@
 //		queue.enqueueWriteBuffer(dev_convolution_mask, CL_TRUE, 0, convolution_mask.size()*sizeof(float), &convolution_mask[0]);
 
 		//4.2 Setup and execute the kernel (i.e. device code)
-		cl::Kernel kernel = cl::Kernel(program, "identityND");
+		cl::Kernel kernel = cl::Kernel(program, "identity");
 		kernel.setArg(0, dev_image_input);
 		kernel.setArg(1, dev_image_output);
 //		kernel.setArg(2, dev_convolution_mask);
 
-		queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(image_input.width(), image_input.height(), image_input.spectrum()), cl::NullRange);
+		queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(image_input.size()), cl::NullRange);
 
 		vector<unsigned char> output_buffer(image_input.size());
 		//4.3 Copy the result from device to host
diff -ru kernels/my_kernels.cl src/kernels/my_kernels.cl
--- kernels/my_kernels.cl	2025-06-22 21:06:37.066781000 -0400
+++ src/kernels/my_kernels.cl	2025-06-22 21:06:37.070804000 -0400
@@ -9,7 +9,6 @@
 	int image_size = get_global_size(0)/3; //each image consists of 3 colour channels
 	int colour_channel = id / image_size; // 0 - red, 1 - green, 2 - blue
 
-	//this is just a copy operation, modify to filter out the individual colour channels
 	B[id] = A[id];
 }
 
@@ -30,7 +29,7 @@
 }
 
 //2D averaging filter
-kernel void avg_filterND(global const uchar* A, global uchar* B) {
+kernel void avg_filter2D(global const uchar* A, global uchar* B) {
 	int width = get_global_size(0); //image width in pixels
 	int height = get_global_size(1); //image height in pixels
 	int image_size = width*height; //image size in pixels
@@ -42,24 +41,19 @@
 
 	int id = x + y*width + c*image_size; //global id in 1D space
 
-	uint result = 0;
+	ushort result = 0;
 
-	//simple boundary handling - just copy the original pixel
-	if ((x == 0) || (x == width-1) || (y == 0) || (y == height-1)) {
-		result = A[id];	
-	} else {
-		for (int i = (x-1); i <= (x+1); i++)
-		for (int j = (y-1); j <= (y+1); j++) 
-			result += A[i + j*width + c*image_size];
+	for (int i = (x-1); i <= (x+1); i++)
+	for (int j = (y-1); j <= (y+1); j++) 
+		result += A[i + j*width + c*image_size];
 
-		result /= 9;
-	}
+	result /= 9;
 
 	B[id] = (uchar)result;
 }
 
 //2D 3x3 convolution kernel
-kernel void convolutionND(global const uchar* A, global uchar* B, constant float* mask) {
+kernel void convolution2D(global const uchar* A, global uchar* B, constant float* mask) {
 	int width = get_global_size(0); //image width in pixels
 	int height = get_global_size(1); //image height in pixels
 	int image_size = width*height; //image size in pixels
@@ -71,16 +65,11 @@
 
 	int id = x + y*width + c*image_size; //global id in 1D space
 
-	float result = 0;
+	ushort result = 0;
 
-	//simple boundary handling - just copy the original pixel
-	if ((x == 0) || (x == width-1) || (y == 0) || (y == height-1)) {
-		result = A[id];	
-	} else {
-		for (int i = (x-1); i <= (x+1); i++)
-		for (int j = (y-1); j <= (y+1); j++) 
-			result += A[i + j*width + c*image_size]*mask[i-(x-1) + j-(y-1)];
-	}
+	for (int i = (x-1); i <= (x+1); i++)
+	for (int j = (y-1); j <= (y+1); j++) 
+		result += A[i + j*width + c*image_size]*mask[i-(x-1) + j-(y-1)];
 
 	B[id] = (uchar)result;
 }
\ No newline at end of file
  • Tutorial 3:
diff -ru kernels/my_kernels.cl src/kernels/my_kernels.cl
--- kernels/my_kernels.cl	2025-06-22 21:06:37.110794000 -0400
+++ src/kernels/my_kernels.cl	2025-06-22 21:06:37.114780000 -0400
@@ -195,7 +195,7 @@
 kernel void scan_add_atomic(global int* A, global int* B) {
 	int id = get_global_id(0);
 	int N = get_global_size(0);
-	for (int i = id+1; i < N && id < N; i++)
+	for (int i = id+1; i < N; i++)
 		atomic_add(&B[i], A[id]);
 }

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions