aad-assignment-1/aad_coin_miner_cuda.c

152 lines
4.0 KiB
C

//
// Arquiteturas de Alto Desempenho 2025/2026
//
// DETI Coin Miner - Host Code
//
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <getopt.h>
#include "aad_data_types.h"
#include "aad_sha1_cpu.h"
#include "aad_cuda_utilities.h"
#include "aad_vault.h"
#define COINS_STORAGE_SIZE 2048u // Increased buffer slightly
static volatile int keep_running = 1;
void signal_handler(int signum) {
(void)signum;
keep_running = 0;
}
static double get_wall_time(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
}
static void mine_coins_cuda(u64_t max_attempts, double max_time)
{
cuda_data_t cd;
u32_t *host_storage;
u64_t attempts = 0;
u32_t coins_found_total = 0;
// Initialize CUDA
memset(&cd, 0, sizeof(cd));
cd.device_number = 0;
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
cd.kernel_name = "mine_deti_coins_kernel";
// Allocate memory for results [ Counter (1 u32) | Data ... ]
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t);
cd.data_size[1] = 0;
initialize_cuda(&cd);
host_storage = (u32_t *)cd.host_data[0];
// Configure Launch Dimensions
// Maximizing occupancy:
cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE; // Usually 128 or 256
cd.grid_dim_x = 80 * 4; // High number of blocks to hide latency
u32_t total_threads = cd.grid_dim_x * cd.block_dim_x;
u32_t attempts_per_thread = 4096; // Work per kernel launch
printf("Starting CUDA Miner on %s\n", cd.device_name);
printf("Threads: %u, Attempts/Thread: %u\n", total_threads, attempts_per_thread);
u64_t base_nonce = 0;
double start_time = get_wall_time();
// Arguments pointers
cd.n_kernel_arguments = 3;
cd.arg[0] = &cd.device_data[0];
cd.arg[1] = &base_nonce;
cd.arg[2] = &attempts_per_thread;
while(keep_running)
{
// 1. Reset storage counter
host_storage[0] = 1u; // Index 0 is the atomic counter. Start data at index 1.
host_to_device_copy(&cd, 0);
// 2. Launch Kernel
launch_kernel(&cd);
// 3. Retrieve Results
device_to_host_copy(&cd, 0);
// 4. Process Found Coins
u32_t next_write_idx = host_storage[0];
u32_t num_u32_written = next_write_idx - 1;
// Each coin is 14 u32 words
if(num_u32_written >= 14)
{
int coins_in_batch = num_u32_written / 14;
for(int c = 0; c < coins_in_batch; c++)
{
u32_t found_coin[14];
// Copy from host buffer to temp array
for(int w=0; w<14; w++) {
found_coin[w] = host_storage[1 + (c * 14) + w];
}
// Verify/Save using required function
save_coin(found_coin);
coins_found_total++;
printf("Coin Found! Total: %u\n", coins_found_total);
}
}
// 5. Update Progress
u64_t batch_attempts = (u64_t)total_threads * attempts_per_thread;
attempts += batch_attempts;
base_nonce += batch_attempts; // Ensure next kernel uses new nonces
// 6. Check Limits
if((max_attempts > 0 && attempts >= max_attempts) ||
(max_time > 0 && (get_wall_time() - start_time) >= max_time)) {
break;
}
}
// Cleanup
double total_time = get_wall_time() - start_time;
printf("\nMining Finished.\n");
printf("Attempts: %llu\n", (unsigned long long)attempts);
printf("Time: %.4fs\n", total_time);
printf("Hashrate: %.2f MH/s\n", (attempts / total_time) / 1000000.0);
save_coin(NULL); // Flush vault
terminate_cuda(&cd);
}
int main(int argc, char *argv[])
{
u64_t max_attempts = 0;
double max_time = 0;
int opt;
signal(SIGINT, signal_handler);
while((opt = getopt(argc, argv, "a:t:")) != -1)
{
switch(opt) {
case 'a': max_attempts = strtoull(optarg, NULL, 10); break;
case 't': max_time = atof(optarg); break;
default:
fprintf(stderr, "Usage: %s -a <attempts> -t <seconds>\n", argv[0]);
return 1;
}
}
mine_coins_cuda(max_attempts, max_time);
return 0;
}