347 lines
10 KiB
C
347 lines
10 KiB
C
//
|
|
// Arquiteturas de Alto Desempenho 2025/2026
|
|
//
|
|
// OpenCL utilities
|
|
//
|
|
|
|
#ifndef AAD_OCL_UTILITIES
|
|
#define AAD_OCL_UTILITIES
|
|
|
|
#define CL_TARGET_OPENCL_VERSION 120
|
|
#include <CL/cl.h>
|
|
|
|
//
|
|
// data type used to store all OpenCL related stuff
|
|
//
|
|
|
|
#define MAX_N_ARGUMENTS 4
|
|
#define RECOMMENDED_OCL_WORK_GROUP_SIZE 128
|
|
|
|
typedef struct
|
|
{
|
|
// input data
|
|
int platform_number; // number of the platform to use
|
|
int device_number; // number of the device to initialize
|
|
char *kernel_file_name; // name of the kernel file to load
|
|
char *kernel_name; // name of the OpenCL kernel to load
|
|
u32_t data_size[2]; // the number of bytes of the two data arrays to allocate on the host and on the device (0 if not needed)
|
|
|
|
// persistent data
|
|
cl_platform_id platform; // the platform handle
|
|
cl_device_id device; // the device handle
|
|
char device_name[256]; // the device name
|
|
cl_context context; // the device context
|
|
cl_command_queue queue; // the command queue
|
|
cl_program program; // the compiled program
|
|
cl_kernel kernel; // the kernel handle
|
|
void *host_data[2]; // the pointers to the host data
|
|
cl_mem device_data[2]; // the device memory objects
|
|
|
|
// launch kernel data
|
|
size_t global_work_size; // total number of work items
|
|
size_t local_work_size; // number of work items per work group
|
|
int n_kernel_arguments; // number of kernel arguments
|
|
}
|
|
ocl_data_t;
|
|
|
|
//
|
|
// CL_CALL --- macro to call an OpenCL function and test its return value
|
|
//
|
|
|
|
#define CL_CALL(f_name, args) \
|
|
do \
|
|
{ \
|
|
cl_int e = f_name args; \
|
|
if(e != CL_SUCCESS) \
|
|
{ \
|
|
fprintf(stderr,"" # f_name "() returned %s (file %s, line %d)\n",cl_error_string(e),__FILE__,__LINE__); \
|
|
exit(1); \
|
|
} \
|
|
} \
|
|
while(0)
|
|
|
|
//
|
|
// Error code to string conversion
|
|
//
|
|
|
|
static const char *cl_error_string(cl_int e)
|
|
{
|
|
static char error_string[64];
|
|
#define CASE(error_code) case error_code: return "" # error_code;
|
|
switch(e)
|
|
{
|
|
default: sprintf(error_string,"unknown error code (%d)",(int)e); return error_string;
|
|
CASE(CL_SUCCESS);
|
|
CASE(CL_DEVICE_NOT_FOUND);
|
|
CASE(CL_DEVICE_NOT_AVAILABLE);
|
|
CASE(CL_COMPILER_NOT_AVAILABLE);
|
|
CASE(CL_MEM_OBJECT_ALLOCATION_FAILURE);
|
|
CASE(CL_OUT_OF_RESOURCES);
|
|
CASE(CL_OUT_OF_HOST_MEMORY);
|
|
CASE(CL_PROFILING_INFO_NOT_AVAILABLE);
|
|
CASE(CL_MEM_COPY_OVERLAP);
|
|
CASE(CL_IMAGE_FORMAT_MISMATCH);
|
|
CASE(CL_IMAGE_FORMAT_NOT_SUPPORTED);
|
|
CASE(CL_BUILD_PROGRAM_FAILURE);
|
|
CASE(CL_MAP_FAILURE);
|
|
CASE(CL_INVALID_VALUE);
|
|
CASE(CL_INVALID_DEVICE_TYPE);
|
|
CASE(CL_INVALID_PLATFORM);
|
|
CASE(CL_INVALID_DEVICE);
|
|
CASE(CL_INVALID_CONTEXT);
|
|
CASE(CL_INVALID_QUEUE_PROPERTIES);
|
|
CASE(CL_INVALID_COMMAND_QUEUE);
|
|
CASE(CL_INVALID_HOST_PTR);
|
|
CASE(CL_INVALID_MEM_OBJECT);
|
|
CASE(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
|
|
CASE(CL_INVALID_IMAGE_SIZE);
|
|
CASE(CL_INVALID_SAMPLER);
|
|
CASE(CL_INVALID_BINARY);
|
|
CASE(CL_INVALID_BUILD_OPTIONS);
|
|
CASE(CL_INVALID_PROGRAM);
|
|
CASE(CL_INVALID_PROGRAM_EXECUTABLE);
|
|
CASE(CL_INVALID_KERNEL_NAME);
|
|
CASE(CL_INVALID_KERNEL_DEFINITION);
|
|
CASE(CL_INVALID_KERNEL);
|
|
CASE(CL_INVALID_ARG_INDEX);
|
|
CASE(CL_INVALID_ARG_VALUE);
|
|
CASE(CL_INVALID_ARG_SIZE);
|
|
CASE(CL_INVALID_KERNEL_ARGS);
|
|
CASE(CL_INVALID_WORK_DIMENSION);
|
|
CASE(CL_INVALID_WORK_GROUP_SIZE);
|
|
CASE(CL_INVALID_WORK_ITEM_SIZE);
|
|
CASE(CL_INVALID_GLOBAL_OFFSET);
|
|
CASE(CL_INVALID_EVENT_WAIT_LIST);
|
|
CASE(CL_INVALID_EVENT);
|
|
CASE(CL_INVALID_OPERATION);
|
|
CASE(CL_INVALID_GL_OBJECT);
|
|
CASE(CL_INVALID_BUFFER_SIZE);
|
|
CASE(CL_INVALID_MIP_LEVEL);
|
|
CASE(CL_INVALID_GLOBAL_WORK_SIZE);
|
|
}
|
|
#undef CASE
|
|
}
|
|
|
|
//
|
|
// Read kernel source from file
|
|
//
|
|
|
|
static char *read_kernel_source(const char *filename, size_t *length)
|
|
{
|
|
FILE *fp = fopen(filename, "rb");
|
|
if(!fp)
|
|
{
|
|
fprintf(stderr, "Failed to open kernel file: %s\n", filename);
|
|
exit(1);
|
|
}
|
|
|
|
fseek(fp, 0, SEEK_END);
|
|
*length = ftell(fp);
|
|
fseek(fp, 0, SEEK_SET);
|
|
|
|
char *source = (char *)malloc(*length + 1);
|
|
if(!source)
|
|
{
|
|
fprintf(stderr, "Failed to allocate memory for kernel source\n");
|
|
fclose(fp);
|
|
exit(1);
|
|
}
|
|
|
|
size_t read = fread(source, 1, *length, fp);
|
|
source[read] = '\0';
|
|
fclose(fp);
|
|
|
|
*length = read;
|
|
return source;
|
|
}
|
|
|
|
//
|
|
// Initialize OpenCL
|
|
//
|
|
|
|
static void initialize_ocl(ocl_data_t *od)
|
|
{
|
|
cl_uint num_platforms, num_devices;
|
|
cl_int err;
|
|
|
|
// Get platform
|
|
CL_CALL(clGetPlatformIDs, (0, NULL, &num_platforms));
|
|
if(od->platform_number >= (int)num_platforms)
|
|
{
|
|
fprintf(stderr, "Invalid platform number %d (only %u platforms available)\n",
|
|
od->platform_number, num_platforms);
|
|
exit(1);
|
|
}
|
|
|
|
cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
|
|
CL_CALL(clGetPlatformIDs, (num_platforms, platforms, NULL));
|
|
od->platform = platforms[od->platform_number];
|
|
free(platforms);
|
|
|
|
// Get device
|
|
CL_CALL(clGetDeviceIDs, (od->platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices));
|
|
if(od->device_number >= (int)num_devices)
|
|
{
|
|
fprintf(stderr, "Invalid device number %d (only %u devices available)\n",
|
|
od->device_number, num_devices);
|
|
exit(1);
|
|
}
|
|
|
|
cl_device_id *devices = (cl_device_id *)malloc(sizeof(cl_device_id) * num_devices);
|
|
CL_CALL(clGetDeviceIDs, (od->platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL));
|
|
od->device = devices[od->device_number];
|
|
free(devices);
|
|
|
|
// Get device name
|
|
CL_CALL(clGetDeviceInfo, (od->device, CL_DEVICE_NAME, sizeof(od->device_name),
|
|
od->device_name, NULL));
|
|
printf("initialize_ocl(): OpenCL code running on %s\n", od->device_name);
|
|
|
|
// Create context
|
|
od->context = clCreateContext(NULL, 1, &od->device, NULL, NULL, &err);
|
|
if(err != CL_SUCCESS)
|
|
{
|
|
fprintf(stderr, "clCreateContext() returned %s\n", cl_error_string(err));
|
|
exit(1);
|
|
}
|
|
|
|
// Create command queue
|
|
od->queue = clCreateCommandQueue(od->context, od->device, 0, &err);
|
|
if(err != CL_SUCCESS)
|
|
{
|
|
fprintf(stderr, "clCreateCommandQueue() returned %s\n", cl_error_string(err));
|
|
exit(1);
|
|
}
|
|
|
|
// Load and compile kernel
|
|
size_t source_length;
|
|
char *source = read_kernel_source(od->kernel_file_name, &source_length);
|
|
|
|
od->program = clCreateProgramWithSource(od->context, 1, (const char **)&source,
|
|
&source_length, &err);
|
|
free(source);
|
|
if(err != CL_SUCCESS)
|
|
{
|
|
fprintf(stderr, "clCreateProgramWithSource() returned %s\n", cl_error_string(err));
|
|
exit(1);
|
|
}
|
|
|
|
err = clBuildProgram(od->program, 1, &od->device, "-cl-std=CL1.2", NULL, NULL);
|
|
if(err != CL_SUCCESS)
|
|
{
|
|
fprintf(stderr, "clBuildProgram() returned %s\n", cl_error_string(err));
|
|
|
|
// Get build log
|
|
size_t log_size;
|
|
clGetProgramBuildInfo(od->program, od->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
|
char *log = (char *)malloc(log_size);
|
|
clGetProgramBuildInfo(od->program, od->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
|
|
fprintf(stderr, "Build log:\n%s\n", log);
|
|
free(log);
|
|
exit(1);
|
|
}
|
|
|
|
// Create kernel
|
|
od->kernel = clCreateKernel(od->program, od->kernel_name, &err);
|
|
if(err != CL_SUCCESS)
|
|
{
|
|
fprintf(stderr, "clCreateKernel() returned %s\n", cl_error_string(err));
|
|
exit(1);
|
|
}
|
|
|
|
// Allocate host and device memory
|
|
for(int i = 0; i < 2; i++)
|
|
{
|
|
if(od->data_size[i] > 0u)
|
|
{
|
|
od->host_data[i] = malloc(od->data_size[i]);
|
|
if(!od->host_data[i])
|
|
{
|
|
fprintf(stderr, "Failed to allocate host memory\n");
|
|
exit(1);
|
|
}
|
|
|
|
od->device_data[i] = clCreateBuffer(od->context, CL_MEM_READ_WRITE,
|
|
od->data_size[i], NULL, &err);
|
|
if(err != CL_SUCCESS)
|
|
{
|
|
fprintf(stderr, "clCreateBuffer() returned %s\n", cl_error_string(err));
|
|
exit(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
od->host_data[i] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Terminate OpenCL
|
|
//
|
|
|
|
static void terminate_ocl(ocl_data_t *od)
|
|
{
|
|
for(int i = 0; i < 2; i++)
|
|
{
|
|
if(od->data_size[i] > 0u)
|
|
{
|
|
free(od->host_data[i]);
|
|
clReleaseMemObject(od->device_data[i]);
|
|
}
|
|
}
|
|
clReleaseKernel(od->kernel);
|
|
clReleaseProgram(od->program);
|
|
clReleaseCommandQueue(od->queue);
|
|
clReleaseContext(od->context);
|
|
}
|
|
|
|
//
|
|
// Copy data between host and device
|
|
//
|
|
|
|
static void host_to_device_copy(ocl_data_t *od, int idx)
|
|
{
|
|
if(idx < 0 || idx > 1 || od->data_size[idx] == 0u)
|
|
{
|
|
fprintf(stderr, "host_to_device_copy(): bad idx\n");
|
|
exit(1);
|
|
}
|
|
CL_CALL(clEnqueueWriteBuffer, (od->queue, od->device_data[idx], CL_TRUE, 0,
|
|
od->data_size[idx], od->host_data[idx], 0, NULL, NULL));
|
|
}
|
|
|
|
static void device_to_host_copy(ocl_data_t *od, int idx)
|
|
{
|
|
if(idx < 0 || idx > 1 || od->data_size[idx] == 0u)
|
|
{
|
|
fprintf(stderr, "device_to_host_copy(): bad idx\n");
|
|
exit(1);
|
|
}
|
|
CL_CALL(clEnqueueReadBuffer, (od->queue, od->device_data[idx], CL_TRUE, 0,
|
|
od->data_size[idx], od->host_data[idx], 0, NULL, NULL));
|
|
}
|
|
|
|
//
|
|
// Set kernel argument
|
|
//
|
|
|
|
static void set_kernel_arg(ocl_data_t *od, int arg_idx, size_t arg_size, const void *arg_value)
|
|
{
|
|
CL_CALL(clSetKernelArg, (od->kernel, arg_idx, arg_size, arg_value));
|
|
}
|
|
|
|
//
|
|
// Launch kernel
|
|
//
|
|
|
|
static void launch_kernel(ocl_data_t *od)
|
|
{
|
|
CL_CALL(clEnqueueNDRangeKernel, (od->queue, od->kernel, 1, NULL,
|
|
&od->global_work_size, &od->local_work_size,
|
|
0, NULL, NULL));
|
|
CL_CALL(clFinish, (od->queue));
|
|
}
|
|
|
|
#endif
|