aad-assignment-1/aad_coin_miner_cuda.c

300 lines
7.8 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// Arquiteturas de Alto Desempenho 2025/2026
//
// DETI Coin Miner - CUDA implementation with histograms
//
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include "aad_data_types.h"
#include "aad_utilities.h"
#include "aad_sha1_cpu.h"
#include "aad_cuda_utilities.h"
#include "aad_vault.h"
#define COINS_STORAGE_SIZE 1024u
#define MAX_HISTOGRAM_BINS 100
static volatile int keep_running = 1;
void signal_handler(int signum)
{
(void)signum;
keep_running = 0;
}
// Histogram data structures
typedef struct {
u32_t bins[MAX_HISTOGRAM_BINS];
u32_t count;
double min_value;
double max_value;
} histogram_t;
static void histogram_init(histogram_t *h)
{
memset(h->bins, 0, sizeof(h->bins));
h->count = 0;
h->min_value = 1e99;
h->max_value = 0.0;
}
static void histogram_add(histogram_t *h, double value)
{
if(value < h->min_value)
h->min_value = value;
if(value > h->max_value)
h->max_value = value;
h->count++;
// For now, just count - we'll bin them later when printing
}
static void histogram_print(histogram_t *h, const char *title, int n_bins)
{
if(h->count == 0)
{
printf("%s: No data\n", title);
return;
}
printf("\n%s:\n", title);
printf(" Count: %u\n", h->count);
printf(" Min: %.6f\n", h->min_value);
printf(" Max: %.6f\n", h->max_value);
printf(" Avg: %.6f\n", (h->min_value + h->max_value) / 2.0);
}
// Coin reconstruction from stored data
static void reconstruct_coin(u32_t *stored_data, u32_t coin[14])
{
// Simply copy the complete coin data from storage
for(int i = 0; i < 14; i++)
coin[i] = stored_data[i];
}
//
// Mine DETI coins using CUDA
//
static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
{
cuda_data_t cd;
u32_t *host_storage;
u64_t attempts = 0;
u32_t coins_found = 0;
u32_t kernel_runs = 0;
// Histograms
histogram_t time_histogram;
histogram_t coins_histogram;
double *kernel_times = NULL;
u32_t *kernel_coin_counts = NULL;
u32_t histogram_capacity = 10000;
histogram_init(&time_histogram);
histogram_init(&coins_histogram);
kernel_times = (double *)malloc(histogram_capacity * sizeof(double));
kernel_coin_counts = (u32_t *)malloc(histogram_capacity * sizeof(u32_t));
// Initialize CUDA
cd.device_number = 0;
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
cd.kernel_name = use_scan_kernel ? "mine_deti_coins_scan_kernel" : "mine_deti_coins_kernel";
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t);
cd.data_size[1] = 0;
initialize_cuda(&cd);
host_storage = (u32_t *)cd.host_data[0];
// Kernel configuration
cd.block_dim_x = RECOMENDED_CUDA_BLOCK_SIZE;
cd.grid_dim_x = 4096; // Large grid for maximum GPU utilization
u32_t n_threads = cd.grid_dim_x * cd.block_dim_x;
printf("Mining DETI coins using CUDA...\n");
printf("Grid: %u blocks × %u threads = %u total threads\n",
cd.grid_dim_x, cd.block_dim_x, n_threads);
printf("Kernel: %s\n", cd.kernel_name);
printf("Press Ctrl+C to stop\n\n");
u32_t param1 = (u32_t)time(NULL);
u32_t param2 = 0x12345678u;
int scan_pos = 12;
time_measurement();
time_measurement();
double start_time = wall_time_delta();
double last_report_time = 0.0;
while(keep_running && (max_attempts == 0 || attempts < max_attempts))
{
// Initialize storage area
host_storage[0] = 1u; // First unused index
// Copy to device
host_to_device_copy(&cd, 0);
// Set kernel arguments
cd.n_kernel_arguments = use_scan_kernel ? 4 : 3;
cd.arg[0] = &cd.device_data[0];
cd.arg[1] = &param1;
cd.arg[2] = &param2;
if(use_scan_kernel)
cd.arg[3] = &scan_pos;
// Launch kernel and measure time
time_measurement();
double kernel_start = cpu_time_delta();
lauch_kernel(&cd);
time_measurement();
double kernel_end = cpu_time_delta();
double kernel_time = kernel_end - kernel_start;
// Copy results back
device_to_host_copy(&cd, 0);
// Process found coins
u32_t n_coins_this_kernel = 0;
u32_t n_stored = (host_storage[0] - 1) / 14;
if(n_stored > 0 && host_storage[0] < COINS_STORAGE_SIZE)
{
printf("DEBUG: host_storage[0] = %u, n_stored = %u\n", host_storage[0], n_stored);
for(u32_t i = 0; i < n_stored; i++)
{
u32_t coin[14];
reconstruct_coin(&host_storage[1 + i * 14], coin);
// Verify it's actually a valid coin
u32_t hash[5];
sha1(coin, hash);
printf("DEBUG: Coin %u - hash[0] = 0x%08X (expected 0xAAD20250)\n", i, hash[0]);
// Print the coin as string
if(i == 0) {
printf("DEBUG: First coin content: ");
u08_t *bytes = (u08_t *)coin;
for(int j = 0; j < 55; j++) {
char c = bytes[j ^ 3];
if(c >= 32 && c <= 126)
printf("%c", c);
else
printf("[0x%02X]", (u08_t)c);
}
printf("\n");
}
if(hash[0] == 0xAAD20250u)
{
coins_found++;
n_coins_this_kernel++;
printf("COIN FOUND! (kernel %u, coin %u in this kernel)\n",
kernel_runs, n_coins_this_kernel);
save_coin(coin);
}
}
}
// Update histograms
if(kernel_runs < histogram_capacity)
{
kernel_times[kernel_runs] = kernel_time;
kernel_coin_counts[kernel_runs] = n_coins_this_kernel;
}
histogram_add(&time_histogram, kernel_time);
histogram_add(&coins_histogram, (double)n_coins_this_kernel);
// Update counters
kernel_runs++;
if(use_scan_kernel)
attempts += n_threads * 256; // Each thread tries 256 values
else
attempts += n_threads;
// Update parameters for next iteration
param1++;
param2 = param2 ^ 0x9E3779B9u;
if(use_scan_kernel)
scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53
// Print progress every second
time_measurement();
double current_time = wall_time_delta() - start_time;
if(current_time - last_report_time >= 1.0)
{
double rate = attempts / current_time;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Kernels: %u, Avg time: %.6f s\n",
(unsigned long long)attempts, rate / 1e6, coins_found, kernel_runs,
current_time / kernel_runs);
last_report_time = current_time;
}
}
time_measurement();
double total_time = wall_time_delta() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
printf("Kernel launches: %u\n", kernel_runs);
// Print histograms
histogram_print(&time_histogram, "Kernel Execution Time Histogram", 20);
histogram_print(&coins_histogram, "Coins Found Per Kernel Histogram", 10);
// Save detailed histogram data
FILE *fp = fopen("cuda_kernel_stats.csv", "w");
if(fp != NULL)
{
fprintf(fp, "kernel_id,time_seconds,coins_found\n");
u32_t n_to_save = (kernel_runs < histogram_capacity) ? kernel_runs : histogram_capacity;
for(u32_t i = 0; i < n_to_save; i++)
{
fprintf(fp, "%u,%.9f,%u\n", i, kernel_times[i], kernel_coin_counts[i]);
}
fclose(fp);
printf("\nDetailed statistics saved to cuda_kernel_stats.csv\n");
}
// Save any remaining coins
save_coin(NULL);
// Cleanup
free(kernel_times);
free(kernel_coin_counts);
terminate_cuda(&cd);
}
int main(int argc, char *argv[])
{
u64_t max_attempts = 0;
int use_scan_kernel = 0;
signal(SIGINT, signal_handler);
if(argc > 1)
max_attempts = strtoull(argv[1], NULL, 10);
if(argc > 2 && strcmp(argv[2], "scan") == 0)
{
use_scan_kernel = 1;
printf("Using scan kernel (tries 256 values per thread)\n");
}
mine_coins_cuda(max_attempts, use_scan_kernel);
return 0;
}