aad-assignment-1/aad_ocl_utilities.h

347 lines
10 KiB
C

//
// Arquiteturas de Alto Desempenho 2025/2026
//
// OpenCL utilities
//
#ifndef AAD_OCL_UTILITIES
#define AAD_OCL_UTILITIES
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
//
// data type used to store all OpenCL related stuff
//
#define MAX_N_ARGUMENTS 4
#define RECOMMENDED_OCL_WORK_GROUP_SIZE 128
typedef struct
{
// input data
int platform_number; // number of the platform to use
int device_number; // number of the device to initialize
char *kernel_file_name; // name of the kernel file to load
char *kernel_name; // name of the OpenCL kernel to load
u32_t data_size[2]; // the number of bytes of the two data arrays to allocate on the host and on the device (0 if not needed)
// persistent data
cl_platform_id platform; // the platform handle
cl_device_id device; // the device handle
char device_name[256]; // the device name
cl_context context; // the device context
cl_command_queue queue; // the command queue
cl_program program; // the compiled program
cl_kernel kernel; // the kernel handle
void *host_data[2]; // the pointers to the host data
cl_mem device_data[2]; // the device memory objects
// launch kernel data
size_t global_work_size; // total number of work items
size_t local_work_size; // number of work items per work group
int n_kernel_arguments; // number of kernel arguments
}
ocl_data_t;
//
// CL_CALL --- macro to call an OpenCL function and test its return value
//
#define CL_CALL(f_name, args) \
do \
{ \
cl_int e = f_name args; \
if(e != CL_SUCCESS) \
{ \
fprintf(stderr,"" # f_name "() returned %s (file %s, line %d)\n",cl_error_string(e),__FILE__,__LINE__); \
exit(1); \
} \
} \
while(0)
//
// Error code to string conversion
//
static const char *cl_error_string(cl_int e)
{
static char error_string[64];
#define CASE(error_code) case error_code: return "" # error_code;
switch(e)
{
default: sprintf(error_string,"unknown error code (%d)",(int)e); return error_string;
CASE(CL_SUCCESS);
CASE(CL_DEVICE_NOT_FOUND);
CASE(CL_DEVICE_NOT_AVAILABLE);
CASE(CL_COMPILER_NOT_AVAILABLE);
CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
CASE(CL_OUT_OF_RESOURCES);
CASE(CL_OUT_OF_HOST_MEMORY);
CASE(CL_PROFILING_INFO_NOT_AVAILABLE);
CASE(CL_MEM_COPY_OVERLAP);
CASE(CL_IMAGE_FORMAT_MISMATCH);
CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
CASE(CL_BUILD_PROGRAM_FAILURE);
CASE(CL_MAP_FAILURE);
CASE(CL_INVALID_VALUE);
CASE(CL_INVALID_DEVICE_TYPE);
CASE(CL_INVALID_PLATFORM);
CASE(CL_INVALID_DEVICE);
CASE(CL_INVALID_CONTEXT);
CASE(CL_INVALID_QUEUE_PROPERTIES);
CASE(CL_INVALID_COMMAND_QUEUE);
CASE(CL_INVALID_HOST_PTR);
CASE(CL_INVALID_MEM_OBJECT);
CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
CASE(CL_INVALID_IMAGE_SIZE);
CASE(CL_INVALID_SAMPLER);
CASE(CL_INVALID_BINARY);
CASE(CL_INVALID_BUILD_OPTIONS);
CASE(CL_INVALID_PROGRAM);
CASE(CL_INVALID_PROGRAM_EXECUTABLE);
CASE(CL_INVALID_KERNEL_NAME);
CASE(CL_INVALID_KERNEL_DEFINITION);
CASE(CL_INVALID_KERNEL);
CASE(CL_INVALID_ARG_INDEX);
CASE(CL_INVALID_ARG_VALUE);
CASE(CL_INVALID_ARG_SIZE);
CASE(CL_INVALID_KERNEL_ARGS);
CASE(CL_INVALID_WORK_DIMENSION);
CASE(CL_INVALID_WORK_GROUP_SIZE);
CASE(CL_INVALID_WORK_ITEM_SIZE);
CASE(CL_INVALID_GLOBAL_OFFSET);
CASE(CL_INVALID_EVENT_WAIT_LIST);
CASE(CL_INVALID_EVENT);
CASE(CL_INVALID_OPERATION);
CASE(CL_INVALID_GL_OBJECT);
CASE(CL_INVALID_BUFFER_SIZE);
CASE(CL_INVALID_MIP_LEVEL);
CASE(CL_INVALID_GLOBAL_WORK_SIZE);
}
#undef CASE
}
//
// Read kernel source from file
//
static char *read_kernel_source(const char *filename, size_t *length)
{
FILE *fp = fopen(filename, "rb");
if(!fp)
{
fprintf(stderr, "Failed to open kernel file: %s\n", filename);
exit(1);
}
fseek(fp, 0, SEEK_END);
*length = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *source = (char *)malloc(*length + 1);
if(!source)
{
fprintf(stderr, "Failed to allocate memory for kernel source\n");
fclose(fp);
exit(1);
}
size_t read = fread(source, 1, *length, fp);
source[read] = '\0';
fclose(fp);
*length = read;
return source;
}
//
// Initialize OpenCL
//
static void initialize_ocl(ocl_data_t *od)
{
cl_uint num_platforms, num_devices;
cl_int err;
// Get platform
CL_CALL(clGetPlatformIDs, (0, NULL, &num_platforms));
if(od->platform_number >= (int)num_platforms)
{
fprintf(stderr, "Invalid platform number %d (only %u platforms available)\n",
od->platform_number, num_platforms);
exit(1);
}
cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
CL_CALL(clGetPlatformIDs, (num_platforms, platforms, NULL));
od->platform = platforms[od->platform_number];
free(platforms);
// Get device
CL_CALL(clGetDeviceIDs, (od->platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices));
if(od->device_number >= (int)num_devices)
{
fprintf(stderr, "Invalid device number %d (only %u devices available)\n",
od->device_number, num_devices);
exit(1);
}
cl_device_id *devices = (cl_device_id *)malloc(sizeof(cl_device_id) * num_devices);
CL_CALL(clGetDeviceIDs, (od->platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL));
od->device = devices[od->device_number];
free(devices);
// Get device name
CL_CALL(clGetDeviceInfo, (od->device, CL_DEVICE_NAME, sizeof(od->device_name),
od->device_name, NULL));
printf("initialize_ocl(): OpenCL code running on %s\n", od->device_name);
// Create context
od->context = clCreateContext(NULL, 1, &od->device, NULL, NULL, &err);
if(err != CL_SUCCESS)
{
fprintf(stderr, "clCreateContext() returned %s\n", cl_error_string(err));
exit(1);
}
// Create command queue
od->queue = clCreateCommandQueue(od->context, od->device, 0, &err);
if(err != CL_SUCCESS)
{
fprintf(stderr, "clCreateCommandQueue() returned %s\n", cl_error_string(err));
exit(1);
}
// Load and compile kernel
size_t source_length;
char *source = read_kernel_source(od->kernel_file_name, &source_length);
od->program = clCreateProgramWithSource(od->context, 1, (const char **)&source,
&source_length, &err);
free(source);
if(err != CL_SUCCESS)
{
fprintf(stderr, "clCreateProgramWithSource() returned %s\n", cl_error_string(err));
exit(1);
}
err = clBuildProgram(od->program, 1, &od->device, "-cl-std=CL1.2", NULL, NULL);
if(err != CL_SUCCESS)
{
fprintf(stderr, "clBuildProgram() returned %s\n", cl_error_string(err));
// Get build log
size_t log_size;
clGetProgramBuildInfo(od->program, od->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char *)malloc(log_size);
clGetProgramBuildInfo(od->program, od->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
fprintf(stderr, "Build log:\n%s\n", log);
free(log);
exit(1);
}
// Create kernel
od->kernel = clCreateKernel(od->program, od->kernel_name, &err);
if(err != CL_SUCCESS)
{
fprintf(stderr, "clCreateKernel() returned %s\n", cl_error_string(err));
exit(1);
}
// Allocate host and device memory
for(int i = 0; i < 2; i++)
{
if(od->data_size[i] > 0u)
{
od->host_data[i] = malloc(od->data_size[i]);
if(!od->host_data[i])
{
fprintf(stderr, "Failed to allocate host memory\n");
exit(1);
}
od->device_data[i] = clCreateBuffer(od->context, CL_MEM_READ_WRITE,
od->data_size[i], NULL, &err);
if(err != CL_SUCCESS)
{
fprintf(stderr, "clCreateBuffer() returned %s\n", cl_error_string(err));
exit(1);
}
}
else
{
od->host_data[i] = NULL;
}
}
}
//
// Terminate OpenCL
//
static void terminate_ocl(ocl_data_t *od)
{
for(int i = 0; i < 2; i++)
{
if(od->data_size[i] > 0u)
{
free(od->host_data[i]);
clReleaseMemObject(od->device_data[i]);
}
}
clReleaseKernel(od->kernel);
clReleaseProgram(od->program);
clReleaseCommandQueue(od->queue);
clReleaseContext(od->context);
}
//
// Copy data between host and device
//
static void host_to_device_copy(ocl_data_t *od, int idx)
{
if(idx < 0 || idx > 1 || od->data_size[idx] == 0u)
{
fprintf(stderr, "host_to_device_copy(): bad idx\n");
exit(1);
}
CL_CALL(clEnqueueWriteBuffer, (od->queue, od->device_data[idx], CL_TRUE, 0,
od->data_size[idx], od->host_data[idx], 0, NULL, NULL));
}
static void device_to_host_copy(ocl_data_t *od, int idx)
{
if(idx < 0 || idx > 1 || od->data_size[idx] == 0u)
{
fprintf(stderr, "device_to_host_copy(): bad idx\n");
exit(1);
}
CL_CALL(clEnqueueReadBuffer, (od->queue, od->device_data[idx], CL_TRUE, 0,
od->data_size[idx], od->host_data[idx], 0, NULL, NULL));
}
//
// Set kernel argument
//
static void set_kernel_arg(ocl_data_t *od, int arg_idx, size_t arg_size, const void *arg_value)
{
CL_CALL(clSetKernelArg, (od->kernel, arg_idx, arg_size, arg_value));
}
//
// Launch kernel
//
static void launch_kernel(ocl_data_t *od)
{
CL_CALL(clEnqueueNDRangeKernel, (od->queue, od->kernel, 1, NULL,
&od->global_work_size, &od->local_work_size,
0, NULL, NULL));
CL_CALL(clFinish, (od->queue));
}
#endif