// // Arquiteturas de Alto Desempenho 2025/2026 // // OpenCL utilities // #ifndef AAD_OCL_UTILITIES #define AAD_OCL_UTILITIES #define CL_TARGET_OPENCL_VERSION 120 #include // // data type used to store all OpenCL related stuff // #define MAX_N_ARGUMENTS 4 #define RECOMMENDED_OCL_WORK_GROUP_SIZE 128 typedef struct { // input data int platform_number; // number of the platform to use int device_number; // number of the device to initialize char *kernel_file_name; // name of the kernel file to load char *kernel_name; // name of the OpenCL kernel to load u32_t data_size[2]; // the number of bytes of the two data arrays to allocate on the host and on the device (0 if not needed) // persistent data cl_platform_id platform; // the platform handle cl_device_id device; // the device handle char device_name[256]; // the device name cl_context context; // the device context cl_command_queue queue; // the command queue cl_program program; // the compiled program cl_kernel kernel; // the kernel handle void *host_data[2]; // the pointers to the host data cl_mem device_data[2]; // the device memory objects // launch kernel data size_t global_work_size; // total number of work items size_t local_work_size; // number of work items per work group int n_kernel_arguments; // number of kernel arguments } ocl_data_t; // // CL_CALL --- macro to call an OpenCL function and test its return value // #define CL_CALL(f_name, args) \ do \ { \ cl_int e = f_name args; \ if(e != CL_SUCCESS) \ { \ fprintf(stderr,"" # f_name "() returned %s (file %s, line %d)\n",cl_error_string(e),__FILE__,__LINE__); \ exit(1); \ } \ } \ while(0) // // Error code to string conversion // static const char *cl_error_string(cl_int e) { static char error_string[64]; #define CASE(error_code) case error_code: return "" # error_code; switch(e) { default: sprintf(error_string,"unknown error code (%d)",(int)e); return error_string; CASE(CL_SUCCESS); CASE(CL_DEVICE_NOT_FOUND); CASE(CL_DEVICE_NOT_AVAILABLE); CASE(CL_COMPILER_NOT_AVAILABLE); CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE); CASE(CL_OUT_OF_RESOURCES); CASE(CL_OUT_OF_HOST_MEMORY); CASE(CL_PROFILING_INFO_NOT_AVAILABLE); CASE(CL_MEM_COPY_OVERLAP); CASE(CL_IMAGE_FORMAT_MISMATCH); CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED); CASE(CL_BUILD_PROGRAM_FAILURE); CASE(CL_MAP_FAILURE); CASE(CL_INVALID_VALUE); CASE(CL_INVALID_DEVICE_TYPE); CASE(CL_INVALID_PLATFORM); CASE(CL_INVALID_DEVICE); CASE(CL_INVALID_CONTEXT); CASE(CL_INVALID_QUEUE_PROPERTIES); CASE(CL_INVALID_COMMAND_QUEUE); CASE(CL_INVALID_HOST_PTR); CASE(CL_INVALID_MEM_OBJECT); CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); CASE(CL_INVALID_IMAGE_SIZE); CASE(CL_INVALID_SAMPLER); CASE(CL_INVALID_BINARY); CASE(CL_INVALID_BUILD_OPTIONS); CASE(CL_INVALID_PROGRAM); CASE(CL_INVALID_PROGRAM_EXECUTABLE); CASE(CL_INVALID_KERNEL_NAME); CASE(CL_INVALID_KERNEL_DEFINITION); CASE(CL_INVALID_KERNEL); CASE(CL_INVALID_ARG_INDEX); CASE(CL_INVALID_ARG_VALUE); CASE(CL_INVALID_ARG_SIZE); CASE(CL_INVALID_KERNEL_ARGS); CASE(CL_INVALID_WORK_DIMENSION); CASE(CL_INVALID_WORK_GROUP_SIZE); CASE(CL_INVALID_WORK_ITEM_SIZE); CASE(CL_INVALID_GLOBAL_OFFSET); CASE(CL_INVALID_EVENT_WAIT_LIST); CASE(CL_INVALID_EVENT); CASE(CL_INVALID_OPERATION); CASE(CL_INVALID_GL_OBJECT); CASE(CL_INVALID_BUFFER_SIZE); CASE(CL_INVALID_MIP_LEVEL); CASE(CL_INVALID_GLOBAL_WORK_SIZE); } #undef CASE } // // Read kernel source from file // static char *read_kernel_source(const char *filename, size_t *length) { FILE *fp = fopen(filename, "rb"); if(!fp) { fprintf(stderr, "Failed to open kernel file: %s\n", filename); exit(1); } fseek(fp, 0, SEEK_END); *length = ftell(fp); fseek(fp, 0, SEEK_SET); char *source = (char *)malloc(*length + 1); if(!source) { fprintf(stderr, "Failed to allocate memory for kernel source\n"); fclose(fp); exit(1); } size_t read = fread(source, 1, *length, fp); source[read] = '\0'; fclose(fp); *length = read; return source; } // // Initialize OpenCL // static void initialize_ocl(ocl_data_t *od) { cl_uint num_platforms, num_devices; cl_int err; // Get platform CL_CALL(clGetPlatformIDs, (0, NULL, &num_platforms)); if(od->platform_number >= (int)num_platforms) { fprintf(stderr, "Invalid platform number %d (only %u platforms available)\n", od->platform_number, num_platforms); exit(1); } cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms); CL_CALL(clGetPlatformIDs, (num_platforms, platforms, NULL)); od->platform = platforms[od->platform_number]; free(platforms); // Get device CL_CALL(clGetDeviceIDs, (od->platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices)); if(od->device_number >= (int)num_devices) { fprintf(stderr, "Invalid device number %d (only %u devices available)\n", od->device_number, num_devices); exit(1); } cl_device_id *devices = (cl_device_id *)malloc(sizeof(cl_device_id) * num_devices); CL_CALL(clGetDeviceIDs, (od->platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL)); od->device = devices[od->device_number]; free(devices); // Get device name CL_CALL(clGetDeviceInfo, (od->device, CL_DEVICE_NAME, sizeof(od->device_name), od->device_name, NULL)); printf("initialize_ocl(): OpenCL code running on %s\n", od->device_name); // Create context od->context = clCreateContext(NULL, 1, &od->device, NULL, NULL, &err); if(err != CL_SUCCESS) { fprintf(stderr, "clCreateContext() returned %s\n", cl_error_string(err)); exit(1); } // Create command queue od->queue = clCreateCommandQueue(od->context, od->device, 0, &err); if(err != CL_SUCCESS) { fprintf(stderr, "clCreateCommandQueue() returned %s\n", cl_error_string(err)); exit(1); } // Load and compile kernel size_t source_length; char *source = read_kernel_source(od->kernel_file_name, &source_length); od->program = clCreateProgramWithSource(od->context, 1, (const char **)&source, &source_length, &err); free(source); if(err != CL_SUCCESS) { fprintf(stderr, "clCreateProgramWithSource() returned %s\n", cl_error_string(err)); exit(1); } err = clBuildProgram(od->program, 1, &od->device, "-cl-std=CL1.2", NULL, NULL); if(err != CL_SUCCESS) { fprintf(stderr, "clBuildProgram() returned %s\n", cl_error_string(err)); // Get build log size_t log_size; clGetProgramBuildInfo(od->program, od->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); char *log = (char *)malloc(log_size); clGetProgramBuildInfo(od->program, od->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); fprintf(stderr, "Build log:\n%s\n", log); free(log); exit(1); } // Create kernel od->kernel = clCreateKernel(od->program, od->kernel_name, &err); if(err != CL_SUCCESS) { fprintf(stderr, "clCreateKernel() returned %s\n", cl_error_string(err)); exit(1); } // Allocate host and device memory for(int i = 0; i < 2; i++) { if(od->data_size[i] > 0u) { od->host_data[i] = malloc(od->data_size[i]); if(!od->host_data[i]) { fprintf(stderr, "Failed to allocate host memory\n"); exit(1); } od->device_data[i] = clCreateBuffer(od->context, CL_MEM_READ_WRITE, od->data_size[i], NULL, &err); if(err != CL_SUCCESS) { fprintf(stderr, "clCreateBuffer() returned %s\n", cl_error_string(err)); exit(1); } } else { od->host_data[i] = NULL; } } } // // Terminate OpenCL // static void terminate_ocl(ocl_data_t *od) { for(int i = 0; i < 2; i++) { if(od->data_size[i] > 0u) { free(od->host_data[i]); clReleaseMemObject(od->device_data[i]); } } clReleaseKernel(od->kernel); clReleaseProgram(od->program); clReleaseCommandQueue(od->queue); clReleaseContext(od->context); } // // Copy data between host and device // static void host_to_device_copy(ocl_data_t *od, int idx) { if(idx < 0 || idx > 1 || od->data_size[idx] == 0u) { fprintf(stderr, "host_to_device_copy(): bad idx\n"); exit(1); } CL_CALL(clEnqueueWriteBuffer, (od->queue, od->device_data[idx], CL_TRUE, 0, od->data_size[idx], od->host_data[idx], 0, NULL, NULL)); } static void device_to_host_copy(ocl_data_t *od, int idx) { if(idx < 0 || idx > 1 || od->data_size[idx] == 0u) { fprintf(stderr, "device_to_host_copy(): bad idx\n"); exit(1); } CL_CALL(clEnqueueReadBuffer, (od->queue, od->device_data[idx], CL_TRUE, 0, od->data_size[idx], od->host_data[idx], 0, NULL, NULL)); } // // Set kernel argument // static void set_kernel_arg(ocl_data_t *od, int arg_idx, size_t arg_size, const void *arg_value) { CL_CALL(clSetKernelArg, (od->kernel, arg_idx, arg_size, arg_value)); } // // Launch kernel // static void launch_kernel(ocl_data_t *od) { CL_CALL(clEnqueueNDRangeKernel, (od->queue, od->kernel, 1, NULL, &od->global_work_size, &od->local_work_size, 0, NULL, NULL)); CL_CALL(clFinish, (od->queue)); } #endif