300 lines
7.8 KiB
C
300 lines
7.8 KiB
C
//
|
||
// Arquiteturas de Alto Desempenho 2025/2026
|
||
//
|
||
// DETI Coin Miner - CUDA implementation with histograms
|
||
//
|
||
|
||
#include <time.h>
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <signal.h>
|
||
#include "aad_data_types.h"
|
||
#include "aad_utilities.h"
|
||
#include "aad_sha1_cpu.h"
|
||
#include "aad_cuda_utilities.h"
|
||
#include "aad_vault.h"
|
||
|
||
#define COINS_STORAGE_SIZE 1024u
|
||
#define MAX_HISTOGRAM_BINS 100
|
||
|
||
static volatile int keep_running = 1;
|
||
|
||
void signal_handler(int signum)
|
||
{
|
||
(void)signum;
|
||
keep_running = 0;
|
||
}
|
||
|
||
// Histogram data structures
|
||
typedef struct {
|
||
u32_t bins[MAX_HISTOGRAM_BINS];
|
||
u32_t count;
|
||
double min_value;
|
||
double max_value;
|
||
} histogram_t;
|
||
|
||
static void histogram_init(histogram_t *h)
|
||
{
|
||
memset(h->bins, 0, sizeof(h->bins));
|
||
h->count = 0;
|
||
h->min_value = 1e99;
|
||
h->max_value = 0.0;
|
||
}
|
||
|
||
static void histogram_add(histogram_t *h, double value)
|
||
{
|
||
if(value < h->min_value)
|
||
h->min_value = value;
|
||
if(value > h->max_value)
|
||
h->max_value = value;
|
||
h->count++;
|
||
|
||
// For now, just count - we'll bin them later when printing
|
||
}
|
||
|
||
static void histogram_print(histogram_t *h, const char *title, int n_bins)
|
||
{
|
||
if(h->count == 0)
|
||
{
|
||
printf("%s: No data\n", title);
|
||
return;
|
||
}
|
||
|
||
printf("\n%s:\n", title);
|
||
printf(" Count: %u\n", h->count);
|
||
printf(" Min: %.6f\n", h->min_value);
|
||
printf(" Max: %.6f\n", h->max_value);
|
||
printf(" Avg: %.6f\n", (h->min_value + h->max_value) / 2.0);
|
||
}
|
||
|
||
// Coin reconstruction from stored data
|
||
static void reconstruct_coin(u32_t *stored_data, u32_t coin[14])
|
||
{
|
||
// Simply copy the complete coin data from storage
|
||
for(int i = 0; i < 14; i++)
|
||
coin[i] = stored_data[i];
|
||
}
|
||
|
||
//
|
||
// Mine DETI coins using CUDA
|
||
//
|
||
static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
|
||
{
|
||
cuda_data_t cd;
|
||
u32_t *host_storage;
|
||
u64_t attempts = 0;
|
||
u32_t coins_found = 0;
|
||
u32_t kernel_runs = 0;
|
||
|
||
// Histograms
|
||
histogram_t time_histogram;
|
||
histogram_t coins_histogram;
|
||
double *kernel_times = NULL;
|
||
u32_t *kernel_coin_counts = NULL;
|
||
u32_t histogram_capacity = 10000;
|
||
|
||
histogram_init(&time_histogram);
|
||
histogram_init(&coins_histogram);
|
||
|
||
kernel_times = (double *)malloc(histogram_capacity * sizeof(double));
|
||
kernel_coin_counts = (u32_t *)malloc(histogram_capacity * sizeof(u32_t));
|
||
|
||
// Initialize CUDA
|
||
cd.device_number = 0;
|
||
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
|
||
cd.kernel_name = use_scan_kernel ? "mine_deti_coins_scan_kernel" : "mine_deti_coins_kernel";
|
||
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t);
|
||
cd.data_size[1] = 0;
|
||
|
||
initialize_cuda(&cd);
|
||
|
||
host_storage = (u32_t *)cd.host_data[0];
|
||
|
||
// Kernel configuration
|
||
cd.block_dim_x = RECOMENDED_CUDA_BLOCK_SIZE;
|
||
cd.grid_dim_x = 4096; // Large grid for maximum GPU utilization
|
||
|
||
u32_t n_threads = cd.grid_dim_x * cd.block_dim_x;
|
||
|
||
printf("Mining DETI coins using CUDA...\n");
|
||
printf("Grid: %u blocks × %u threads = %u total threads\n",
|
||
cd.grid_dim_x, cd.block_dim_x, n_threads);
|
||
printf("Kernel: %s\n", cd.kernel_name);
|
||
printf("Press Ctrl+C to stop\n\n");
|
||
|
||
u32_t param1 = (u32_t)time(NULL);
|
||
u32_t param2 = 0x12345678u;
|
||
int scan_pos = 12;
|
||
|
||
time_measurement();
|
||
time_measurement();
|
||
double start_time = wall_time_delta();
|
||
double last_report_time = 0.0;
|
||
|
||
while(keep_running && (max_attempts == 0 || attempts < max_attempts))
|
||
{
|
||
// Initialize storage area
|
||
host_storage[0] = 1u; // First unused index
|
||
|
||
// Copy to device
|
||
host_to_device_copy(&cd, 0);
|
||
|
||
// Set kernel arguments
|
||
cd.n_kernel_arguments = use_scan_kernel ? 4 : 3;
|
||
cd.arg[0] = &cd.device_data[0];
|
||
cd.arg[1] = ¶m1;
|
||
cd.arg[2] = ¶m2;
|
||
if(use_scan_kernel)
|
||
cd.arg[3] = &scan_pos;
|
||
|
||
// Launch kernel and measure time
|
||
time_measurement();
|
||
double kernel_start = cpu_time_delta();
|
||
lauch_kernel(&cd);
|
||
time_measurement();
|
||
double kernel_end = cpu_time_delta();
|
||
double kernel_time = kernel_end - kernel_start;
|
||
|
||
// Copy results back
|
||
device_to_host_copy(&cd, 0);
|
||
|
||
// Process found coins
|
||
u32_t n_coins_this_kernel = 0;
|
||
u32_t n_stored = (host_storage[0] - 1) / 14;
|
||
|
||
if(n_stored > 0 && host_storage[0] < COINS_STORAGE_SIZE)
|
||
{
|
||
printf("DEBUG: host_storage[0] = %u, n_stored = %u\n", host_storage[0], n_stored);
|
||
|
||
for(u32_t i = 0; i < n_stored; i++)
|
||
{
|
||
u32_t coin[14];
|
||
reconstruct_coin(&host_storage[1 + i * 14], coin);
|
||
|
||
// Verify it's actually a valid coin
|
||
u32_t hash[5];
|
||
sha1(coin, hash);
|
||
|
||
printf("DEBUG: Coin %u - hash[0] = 0x%08X (expected 0xAAD20250)\n", i, hash[0]);
|
||
|
||
// Print the coin as string
|
||
if(i == 0) {
|
||
printf("DEBUG: First coin content: ");
|
||
u08_t *bytes = (u08_t *)coin;
|
||
for(int j = 0; j < 55; j++) {
|
||
char c = bytes[j ^ 3];
|
||
if(c >= 32 && c <= 126)
|
||
printf("%c", c);
|
||
else
|
||
printf("[0x%02X]", (u08_t)c);
|
||
}
|
||
printf("\n");
|
||
}
|
||
|
||
if(hash[0] == 0xAAD20250u)
|
||
{
|
||
coins_found++;
|
||
n_coins_this_kernel++;
|
||
printf("COIN FOUND! (kernel %u, coin %u in this kernel)\n",
|
||
kernel_runs, n_coins_this_kernel);
|
||
save_coin(coin);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Update histograms
|
||
if(kernel_runs < histogram_capacity)
|
||
{
|
||
kernel_times[kernel_runs] = kernel_time;
|
||
kernel_coin_counts[kernel_runs] = n_coins_this_kernel;
|
||
}
|
||
|
||
histogram_add(&time_histogram, kernel_time);
|
||
histogram_add(&coins_histogram, (double)n_coins_this_kernel);
|
||
|
||
// Update counters
|
||
kernel_runs++;
|
||
if(use_scan_kernel)
|
||
attempts += n_threads * 256; // Each thread tries 256 values
|
||
else
|
||
attempts += n_threads;
|
||
|
||
// Update parameters for next iteration
|
||
param1++;
|
||
param2 = param2 ^ 0x9E3779B9u;
|
||
if(use_scan_kernel)
|
||
scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53
|
||
|
||
// Print progress every second
|
||
time_measurement();
|
||
double current_time = wall_time_delta() - start_time;
|
||
if(current_time - last_report_time >= 1.0)
|
||
{
|
||
double rate = attempts / current_time;
|
||
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Kernels: %u, Avg time: %.6f s\n",
|
||
(unsigned long long)attempts, rate / 1e6, coins_found, kernel_runs,
|
||
current_time / kernel_runs);
|
||
last_report_time = current_time;
|
||
}
|
||
}
|
||
|
||
time_measurement();
|
||
double total_time = wall_time_delta() - start_time;
|
||
|
||
printf("\n=== Mining Statistics ===\n");
|
||
printf("Total attempts: %llu\n", (unsigned long long)attempts);
|
||
printf("Total time: %.2f seconds\n", total_time);
|
||
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
|
||
printf("Coins found: %u\n", coins_found);
|
||
printf("Kernel launches: %u\n", kernel_runs);
|
||
|
||
// Print histograms
|
||
histogram_print(&time_histogram, "Kernel Execution Time Histogram", 20);
|
||
histogram_print(&coins_histogram, "Coins Found Per Kernel Histogram", 10);
|
||
|
||
// Save detailed histogram data
|
||
FILE *fp = fopen("cuda_kernel_stats.csv", "w");
|
||
if(fp != NULL)
|
||
{
|
||
fprintf(fp, "kernel_id,time_seconds,coins_found\n");
|
||
u32_t n_to_save = (kernel_runs < histogram_capacity) ? kernel_runs : histogram_capacity;
|
||
for(u32_t i = 0; i < n_to_save; i++)
|
||
{
|
||
fprintf(fp, "%u,%.9f,%u\n", i, kernel_times[i], kernel_coin_counts[i]);
|
||
}
|
||
fclose(fp);
|
||
printf("\nDetailed statistics saved to cuda_kernel_stats.csv\n");
|
||
}
|
||
|
||
// Save any remaining coins
|
||
save_coin(NULL);
|
||
|
||
// Cleanup
|
||
free(kernel_times);
|
||
free(kernel_coin_counts);
|
||
terminate_cuda(&cd);
|
||
}
|
||
|
||
int main(int argc, char *argv[])
|
||
{
|
||
u64_t max_attempts = 0;
|
||
int use_scan_kernel = 0;
|
||
|
||
signal(SIGINT, signal_handler);
|
||
|
||
if(argc > 1)
|
||
max_attempts = strtoull(argv[1], NULL, 10);
|
||
|
||
if(argc > 2 && strcmp(argv[2], "scan") == 0)
|
||
{
|
||
use_scan_kernel = 1;
|
||
printf("Using scan kernel (tries 256 values per thread)\n");
|
||
}
|
||
|
||
mine_coins_cuda(max_attempts, use_scan_kernel);
|
||
|
||
return 0;
|
||
}
|
||
|