// // Arquiteturas de Alto Desempenho 2025/2026 // // DETI Coin Miner - CUDA implementation with histograms // #include #include #include #include #include #include "aad_data_types.h" #include "aad_utilities.h" #include "aad_sha1_cpu.h" #include "aad_cuda_utilities.h" #include "aad_vault.h" #define COINS_STORAGE_SIZE 1024u #define MAX_HISTOGRAM_BINS 100 static volatile int keep_running = 1; void signal_handler(int signum) { (void)signum; keep_running = 0; } // Histogram data structures typedef struct { u32_t bins[MAX_HISTOGRAM_BINS]; u32_t count; double min_value; double max_value; } histogram_t; static void histogram_init(histogram_t *h) { memset(h->bins, 0, sizeof(h->bins)); h->count = 0; h->min_value = 1e99; h->max_value = 0.0; } static void histogram_add(histogram_t *h, double value) { if(value < h->min_value) h->min_value = value; if(value > h->max_value) h->max_value = value; h->count++; // For now, just count - we'll bin them later when printing } static void histogram_print(histogram_t *h, const char *title, int n_bins) { if(h->count == 0) { printf("%s: No data\n", title); return; } printf("\n%s:\n", title); printf(" Count: %u\n", h->count); printf(" Min: %.6f\n", h->min_value); printf(" Max: %.6f\n", h->max_value); printf(" Avg: %.6f\n", (h->min_value + h->max_value) / 2.0); } // Coin reconstruction from stored data static void reconstruct_coin(u32_t *stored_data, u32_t coin[14]) { // Fixed parts (must match kernel byte order) coin[0] = 0x44455449u; // "DETI" with byte swap (idx ^ 3) coin[1] = 0x20636F69u; // " coi" with byte swap (idx ^ 3) coin[2] = 0x6E203220u; // "n 2 " with byte swap (idx ^ 3) // Variable parts (restore from storage) for(int i = 0; i < 11; i++) coin[3 + i] = stored_data[i]; } // // Mine DETI coins using CUDA // static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) { cuda_data_t cd; u32_t *host_storage; u64_t attempts = 0; u32_t coins_found = 0; u32_t kernel_runs = 0; // Histograms histogram_t time_histogram; histogram_t coins_histogram; double *kernel_times = NULL; u32_t *kernel_coin_counts = NULL; u32_t histogram_capacity = 10000; histogram_init(&time_histogram); histogram_init(&coins_histogram); kernel_times = (double *)malloc(histogram_capacity * sizeof(double)); kernel_coin_counts = (u32_t *)malloc(histogram_capacity * sizeof(u32_t)); // Initialize CUDA cd.device_number = 0; cd.cubin_file_name = "coin_miner_cuda_kernel.cubin"; cd.kernel_name = use_scan_kernel ? "mine_deti_coins_scan_kernel" : "mine_deti_coins_kernel"; cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t); cd.data_size[1] = 0; initialize_cuda(&cd); host_storage = (u32_t *)cd.host_data[0]; // Kernel configuration cd.block_dim_x = RECOMENDED_CUDA_BLOCK_SIZE; cd.grid_dim_x = 4096; // Large grid for maximum GPU utilization u32_t n_threads = cd.grid_dim_x * cd.block_dim_x; printf("Mining DETI coins using CUDA...\n"); printf("Grid: %u blocks × %u threads = %u total threads\n", cd.grid_dim_x, cd.block_dim_x, n_threads); printf("Kernel: %s\n", cd.kernel_name); printf("Press Ctrl+C to stop\n\n"); u32_t param1 = (u32_t)time(NULL); u32_t param2 = 0x12345678u; int scan_pos = 12; time_measurement(); time_measurement(); double start_time = wall_time_delta(); double last_report_time = 0.0; while(keep_running && (max_attempts == 0 || attempts < max_attempts)) { // Initialize storage area host_storage[0] = 1u; // First unused index // Copy to device host_to_device_copy(&cd, 0); // Set kernel arguments cd.n_kernel_arguments = use_scan_kernel ? 4 : 3; cd.arg[0] = &cd.device_data[0]; cd.arg[1] = ¶m1; cd.arg[2] = ¶m2; if(use_scan_kernel) cd.arg[3] = &scan_pos; // Launch kernel and measure time time_measurement(); double kernel_start = cpu_time_delta(); lauch_kernel(&cd); time_measurement(); double kernel_end = cpu_time_delta(); double kernel_time = kernel_end - kernel_start; // Copy results back device_to_host_copy(&cd, 0); // Process found coins u32_t n_coins_this_kernel = 0; u32_t n_stored = (host_storage[0] - 1) / 14; if(n_stored > 0 && host_storage[0] < COINS_STORAGE_SIZE) { printf("DEBUG: host_storage[0] = %u, n_stored = %u\n", host_storage[0], n_stored); for(u32_t i = 0; i < n_stored; i++) { u32_t coin[14]; reconstruct_coin(&host_storage[1 + i * 14], coin); // Verify it's actually a valid coin u32_t hash[5]; sha1(coin, hash); printf("DEBUG: Coin %u - hash[0] = 0x%08X (expected 0xAAD20250)\n", i, hash[0]); // Print the coin as string if(i == 0) { printf("DEBUG: First coin content: "); u08_t *bytes = (u08_t *)coin; for(int j = 0; j < 55; j++) { char c = bytes[j ^ 3]; if(c >= 32 && c <= 126) printf("%c", c); else printf("[0x%02X]", (u08_t)c); } printf("\n"); } if(hash[0] == 0xAAD20250u) { coins_found++; n_coins_this_kernel++; printf("COIN FOUND! (kernel %u, coin %u in this kernel)\n", kernel_runs, n_coins_this_kernel); save_coin(coin); } } } // Update histograms if(kernel_runs < histogram_capacity) { kernel_times[kernel_runs] = kernel_time; kernel_coin_counts[kernel_runs] = n_coins_this_kernel; } histogram_add(&time_histogram, kernel_time); histogram_add(&coins_histogram, (double)n_coins_this_kernel); // Update counters kernel_runs++; if(use_scan_kernel) attempts += n_threads * 256; // Each thread tries 256 values else attempts += n_threads; // Update parameters for next iteration param1++; param2 = param2 ^ 0x9E3779B9u; if(use_scan_kernel) scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53 // Print progress every second time_measurement(); double current_time = wall_time_delta() - start_time; if(current_time - last_report_time >= 1.0) { double rate = attempts / current_time; printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Kernels: %u, Avg time: %.6f s\n", (unsigned long long)attempts, rate / 1e6, coins_found, kernel_runs, current_time / kernel_runs); last_report_time = current_time; } } time_measurement(); double total_time = wall_time_delta() - start_time; printf("\n=== Mining Statistics ===\n"); printf("Total attempts: %llu\n", (unsigned long long)attempts); printf("Total time: %.2f seconds\n", total_time); printf("Average rate: %.2f attempts/second\n", attempts / total_time); printf("Coins found: %u\n", coins_found); printf("Kernel launches: %u\n", kernel_runs); // Print histograms histogram_print(&time_histogram, "Kernel Execution Time Histogram", 20); histogram_print(&coins_histogram, "Coins Found Per Kernel Histogram", 10); // Save detailed histogram data FILE *fp = fopen("cuda_kernel_stats.csv", "w"); if(fp != NULL) { fprintf(fp, "kernel_id,time_seconds,coins_found\n"); u32_t n_to_save = (kernel_runs < histogram_capacity) ? kernel_runs : histogram_capacity; for(u32_t i = 0; i < n_to_save; i++) { fprintf(fp, "%u,%.9f,%u\n", i, kernel_times[i], kernel_coin_counts[i]); } fclose(fp); printf("\nDetailed statistics saved to cuda_kernel_stats.csv\n"); } // Save any remaining coins save_coin(NULL); // Cleanup free(kernel_times); free(kernel_coin_counts); terminate_cuda(&cd); } int main(int argc, char *argv[]) { u64_t max_attempts = 0; int use_scan_kernel = 0; signal(SIGINT, signal_handler); if(argc > 1) max_attempts = strtoull(argv[1], NULL, 10); if(argc > 2 && strcmp(argv[2], "scan") == 0) { use_scan_kernel = 1; printf("Using scan kernel (tries 256 values per thread)\n"); } mine_coins_cuda(max_attempts, use_scan_kernel); return 0; }