aad-assignment-1/aad_coin_miner_simd.c

603 lines
17 KiB
C

//
// Arquiteturas de Alto Desempenho 2025/2026
//
// DETI Coin Miner - SIMD implementation (AVX/AVX2/AVX512F)
//
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <getopt.h>
#include "aad_data_types.h"
#include "aad_utilities.h"
#include "aad_sha1_cpu.h"
#include "aad_vault.h"
static volatile int keep_running = 1;
void signal_handler(int signum)
{
(void)signum;
keep_running = 0;
}
//
// check if a hash starts with aad20250
//
static int is_valid_coin(u32_t *hash)
{
return hash[0] == 0xAAD20250u;
}
// Get current wall time in seconds
static double get_wall_time(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
}
//
// increment coin variable part using the same logic as CPU miner
// returns 0 if overflow (all positions wrapped around), 1 otherwise
//
static int increment_coin(u32_t coin[14])
{
// Increment the variable part using byte-by-byte logic with carry
// Increment from the end to beginning (positions 53 down to 12)
int pos = 53;
while(pos >= 12)
{
u08_t *byte = &((u08_t *)coin)[pos ^ 3];
if(*byte == '\n' || *byte == 0x80)
*byte = 32; // Start from space
(*byte)++;
// Skip newline character
if(*byte == '\n')
(*byte)++;
// Wrap around at 127 (printable ASCII limit)
if(*byte >= 127)
{
*byte = 32; // Reset to space
pos--; // Carry to next position
}
else
{
break; // No carry needed
}
}
// Return 0 if we carried all the way through (overflow), 1 otherwise
return (pos >= 12);
}
//
// prepare interleaved data for SIMD processing
//
static void prepare_coins(u32_t base_coin[14], u32_t *interleaved_data, int simd_width)
{
for(int lane = 0; lane < simd_width; lane++)
{
u32_t coin[14];
memcpy(coin, base_coin, sizeof(coin));
// Interleave the data
for(int idx = 0; idx < 14; idx++)
{
interleaved_data[idx * simd_width + lane] = coin[idx];
}
// Increment the base coin for the next lane
increment_coin(base_coin);
}
}
//
// extract individual hashes from interleaved hash data
//
static void extract_hashes(u32_t *interleaved_hash, u32_t hashes[][5], int simd_width)
{
for(int lane = 0; lane < simd_width; lane++)
{
for(int idx = 0; idx < 5; idx++)
{
hashes[lane][idx] = interleaved_hash[idx * simd_width + lane];
}
}
}
//
// extract individual coins from interleaved data
//
static void extract_coins(u32_t *interleaved_data, u32_t coins[][14], int simd_width)
{
for(int lane = 0; lane < simd_width; lane++)
{
for(int idx = 0; idx < 14; idx++)
{
coins[lane][idx] = interleaved_data[idx * simd_width + lane];
}
}
}
#if defined(__AVX__)
//
// mine DETI coins using AVX (4-way SIMD)
//
__attribute__((unused))
static void mine_coins_avx(u64_t max_attempts, double max_time)
{
const int SIMD_WIDTH = 4;
u32_t base_coin[14];
u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(16)));
u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(16)));
u64_t attempts = 0;
u32_t coins_found = 0;
// Initialize base coin template
memset(base_coin, 0, sizeof(base_coin));
((u08_t *)base_coin)[0x0 ^ 3] = 'D';
((u08_t *)base_coin)[0x1 ^ 3] = 'E';
((u08_t *)base_coin)[0x2 ^ 3] = 'T';
((u08_t *)base_coin)[0x3 ^ 3] = 'I';
((u08_t *)base_coin)[0x4 ^ 3] = ' ';
((u08_t *)base_coin)[0x5 ^ 3] = 'c';
((u08_t *)base_coin)[0x6 ^ 3] = 'o';
((u08_t *)base_coin)[0x7 ^ 3] = 'i';
((u08_t *)base_coin)[0x8 ^ 3] = 'n';
((u08_t *)base_coin)[0x9 ^ 3] = ' ';
((u08_t *)base_coin)[0xa ^ 3] = '2';
((u08_t *)base_coin)[0xb ^ 3] = ' ';
((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part with A-Z cycling pattern (same as CPU miner)
for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A' + (i - 12) % 26;
printf("Mining DETI coins using AVX (4-way SIMD)...\n");
if(max_attempts > 0 && max_time > 0)
printf("Will stop after %llu attempts OR %.2f seconds (whichever comes first)\n",
(unsigned long long)max_attempts, max_time);
else if(max_attempts > 0)
printf("Will stop after %llu attempts\n", (unsigned long long)max_attempts);
else if(max_time > 0)
printf("Will stop after %.2f seconds\n", max_time);
else
printf("Running indefinitely until Ctrl+C...\n");
printf("Press Ctrl+C to stop\n\n");
double start_time = get_wall_time();
while(keep_running)
{
// Check stopping conditions
if(max_attempts > 0 && attempts >= max_attempts)
break;
double elapsed = get_wall_time() - start_time;
if(max_time > 0 && elapsed >= max_time)
break;
// Prepare coins for this batch
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
// Compute SHA1 hashes
sha1_avx((v4si *)interleaved_data, (v4si *)interleaved_hash);
attempts += SIMD_WIDTH;
// Check each lane for valid coins
u32_t hashes[SIMD_WIDTH][5];
extract_hashes(interleaved_hash, hashes, SIMD_WIDTH);
for(int lane = 0; lane < SIMD_WIDTH; lane++)
{
if(is_valid_coin(hashes[lane]))
{
coins_found++;
u32_t coins[SIMD_WIDTH][14];
extract_coins(interleaved_data, coins, SIMD_WIDTH);
printf("COIN FOUND! (attempt %llu, lane %d)\n",
(unsigned long long)(attempts - SIMD_WIDTH + lane), lane);
save_coin(coins[lane]);
}
}
// Print progress every 1M attempts
if(attempts % 1000000 < SIMD_WIDTH)
{
elapsed = get_wall_time() - start_time;
double rate = attempts / elapsed;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n",
(unsigned long long)attempts, rate / 1e6, coins_found, elapsed);
}
}
double total_time = get_wall_time() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
save_coin(NULL);
}
#endif
#if defined(__AVX2__)
//
// mine DETI coins using AVX2 (8-way SIMD)
//
__attribute__((unused))
static void mine_coins_avx2(u64_t max_attempts, double max_time)
{
const int SIMD_WIDTH = 8;
u32_t base_coin[14];
u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(32)));
u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(32)));
u64_t attempts = 0;
u32_t coins_found = 0;
// Initialize base coin template
memset(base_coin, 0, sizeof(base_coin));
((u08_t *)base_coin)[0x0 ^ 3] = 'D';
((u08_t *)base_coin)[0x1 ^ 3] = 'E';
((u08_t *)base_coin)[0x2 ^ 3] = 'T';
((u08_t *)base_coin)[0x3 ^ 3] = 'I';
((u08_t *)base_coin)[0x4 ^ 3] = ' ';
((u08_t *)base_coin)[0x5 ^ 3] = 'c';
((u08_t *)base_coin)[0x6 ^ 3] = 'o';
((u08_t *)base_coin)[0x7 ^ 3] = 'i';
((u08_t *)base_coin)[0x8 ^ 3] = 'n';
((u08_t *)base_coin)[0x9 ^ 3] = ' ';
((u08_t *)base_coin)[0xa ^ 3] = '2';
((u08_t *)base_coin)[0xb ^ 3] = ' ';
((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part with A-Z cycling pattern (same as CPU miner)
for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A' + (i - 12) % 26;
printf("Mining DETI coins using AVX2 (8-way SIMD)...\n");
if(max_attempts > 0 && max_time > 0)
printf("Will stop after %llu attempts OR %.2f seconds (whichever comes first)\n",
(unsigned long long)max_attempts, max_time);
else if(max_attempts > 0)
printf("Will stop after %llu attempts\n", (unsigned long long)max_attempts);
else if(max_time > 0)
printf("Will stop after %.2f seconds\n", max_time);
else
printf("Running indefinitely until Ctrl+C...\n");
printf("Press Ctrl+C to stop\n\n");
double start_time = get_wall_time();
while(keep_running)
{
// Check stopping conditions
if(max_attempts > 0 && attempts >= max_attempts)
break;
double elapsed = get_wall_time() - start_time;
if(max_time > 0 && elapsed >= max_time)
break;
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash);
attempts += SIMD_WIDTH;
u32_t hashes[SIMD_WIDTH][5];
extract_hashes(interleaved_hash, hashes, SIMD_WIDTH);
for(int lane = 0; lane < SIMD_WIDTH; lane++)
{
if(is_valid_coin(hashes[lane]))
{
coins_found++;
u32_t coins[SIMD_WIDTH][14];
extract_coins(interleaved_data, coins, SIMD_WIDTH);
printf("COIN FOUND! (attempt %llu, lane %d)\n",
(unsigned long long)(attempts - SIMD_WIDTH + lane), lane);
save_coin(coins[lane]);
}
}
if(attempts % 1000000 < SIMD_WIDTH)
{
elapsed = get_wall_time() - start_time;
double rate = attempts / elapsed;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n",
(unsigned long long)attempts, rate / 1e6, coins_found, elapsed);
}
}
double total_time = get_wall_time() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
save_coin(NULL);
}
#endif
#if defined(__AVX2__)
#include <omp.h>
//
// mine DETI coins using AVX2 (8-way SIMD) + OpenMP
//
__attribute__((unused))
static void mine_coins_avx2_omp(u64_t max_attempts, double max_time)
{
const int SIMD_WIDTH = 8;
int num_threads = omp_get_max_threads();
u64_t attempts = 0;
u32_t coins_found = 0;
u64_t last_reported_attempts = 0;
printf("Mining DETI coins using AVX2 (8-way SIMD) + OpenMP (%d threads)...\n", num_threads);
if(max_attempts > 0 && max_time > 0)
printf("Will stop after %llu attempts OR %.2f seconds (whichever comes first)\n",
(unsigned long long)max_attempts, max_time);
else if(max_attempts > 0)
printf("Will stop after %llu attempts\n", (unsigned long long)max_attempts);
else if(max_time > 0)
printf("Will stop after %.2f seconds\n", max_time);
else
printf("Running indefinitely until Ctrl+C...\n");
printf("Press Ctrl+C to stop\n\n");
double start_time = get_wall_time();
int should_stop = 0;
#pragma omp parallel
{
u32_t base_coin[14];
u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(32)));
u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(32)));
u64_t thread_attempts = 0;
// Initialize base coin template (unique per thread)
memset(base_coin, 0, sizeof(base_coin));
((u08_t *)base_coin)[0x0 ^ 3] = 'D';
((u08_t *)base_coin)[0x1 ^ 3] = 'E';
((u08_t *)base_coin)[0x2 ^ 3] = 'T';
((u08_t *)base_coin)[0x3 ^ 3] = 'I';
((u08_t *)base_coin)[0x4 ^ 3] = ' ';
((u08_t *)base_coin)[0x5 ^ 3] = 'c';
((u08_t *)base_coin)[0x6 ^ 3] = 'o';
((u08_t *)base_coin)[0x7 ^ 3] = 'i';
((u08_t *)base_coin)[0x8 ^ 3] = 'n';
((u08_t *)base_coin)[0x9 ^ 3] = ' ';
((u08_t *)base_coin)[0xa ^ 3] = '2';
((u08_t *)base_coin)[0xb ^ 3] = ' ';
((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part with A-Z cycling pattern (offset per thread)
int thread_id = omp_get_thread_num();
for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A' + ((i - 12 + thread_id * SIMD_WIDTH) % 26);
while(keep_running && !should_stop)
{
// Check stopping conditions (check shared flag first)
if(should_stop)
break;
if(max_attempts > 0 && attempts >= max_attempts)
{
should_stop = 1;
break;
}
double elapsed_time = get_wall_time() - start_time;
if(max_time > 0 && elapsed_time >= max_time)
{
should_stop = 1;
break;
}
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash);
thread_attempts += SIMD_WIDTH;
u32_t hashes[SIMD_WIDTH][5];
extract_hashes(interleaved_hash, hashes, SIMD_WIDTH);
for(int lane = 0; lane < SIMD_WIDTH; lane++)
{
if(is_valid_coin(hashes[lane]))
{
#pragma omp critical
{
coins_found++;
u32_t coins[SIMD_WIDTH][14];
extract_coins(interleaved_data, coins, SIMD_WIDTH);
printf("COIN FOUND! (attempt %llu, thread %d, lane %d)\n",
(unsigned long long)(attempts + thread_attempts - SIMD_WIDTH + lane),
thread_id, lane);
save_coin(coins[lane]);
}
}
}
// Print progress every 1M attempts (only from one thread)
// Periodically update the shared counter and report
if(thread_attempts >= 100000)
{
#pragma omp atomic
attempts += thread_attempts;
thread_attempts = 0;
// Only master thread reports progress (no barrier to avoid blocking)
if(thread_id == 0)
{
u64_t current_attempts;
#pragma omp atomic read
current_attempts = attempts;
if(current_attempts - last_reported_attempts >= 1000000)
{
double elapsed = get_wall_time() - start_time;
double rate = current_attempts / elapsed;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n",
(unsigned long long)current_attempts, rate / 1e6, coins_found, elapsed);
last_reported_attempts = current_attempts;
}
}
}
}
#pragma omp atomic
attempts += thread_attempts;
}
double total_time = get_wall_time() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
save_coin(NULL);
}
#endif
void print_usage(const char *prog_name)
{
printf("Usage: %s [OPTIONS]\n", prog_name);
printf("Options:\n");
printf(" -a <attempts> Maximum number of attempts\n");
printf(" -t <seconds> Maximum time in seconds\n");
printf(" --avx Use AVX (4-way SIMD)\n");
printf(" --avx2 Use AVX2 (8-way SIMD)\n");
printf(" --omp Use AVX2 + OpenMP (multi-threaded)\n");
printf(" -h Show this help message\n");
printf("\nNote: Only one SIMD mode (--avx, --avx2, --omp) can be used at a time.\n");
printf(" If no SIMD mode is specified, the program will auto-detect.\n");
printf("\nExamples:\n");
printf(" %s --avx2 -a 1000000 # AVX2 for 1M attempts\n", prog_name);
printf(" %s --omp -t 60 # AVX2+OpenMP for 60 seconds\n", prog_name);
printf(" %s --avx -a 1000000 -t 60 # AVX, stop at 1M or 60s\n", prog_name);
}
int main(int argc, char *argv[])
{
u64_t max_attempts = 0;
double max_time = 0;
int use_avx = 0;
int use_avx2 = 0;
int use_omp = 0;
int opt;
signal(SIGINT, signal_handler);
// Define long options
static struct option long_options[] = {
{"avx", no_argument, 0, 'x'},
{"avx2", no_argument, 0, 'y'},
{"omp", no_argument, 0, 'o'},
{0, 0, 0, 0}
};
// Parse command line options
int option_index = 0;
while((opt = getopt_long(argc, argv, "a:t:h", long_options, &option_index)) != -1)
{
switch(opt)
{
case 'a':
max_attempts = strtoull(optarg, NULL, 10);
break;
case 't':
max_time = atof(optarg);
break;
case 'x':
use_avx = 1;
break;
case 'y':
use_avx2 = 1;
break;
case 'o':
use_omp = 1;
break;
case 'h':
print_usage(argv[0]);
return 0;
default:
print_usage(argv[0]);
return 1;
}
}
// Check for conflicting SIMD modes
int simd_flags = use_avx + use_avx2 + use_omp;
if(simd_flags > 1)
{
fprintf(stderr, "Error: Only one SIMD mode (--avx, --avx2, --omp) can be specified.\n");
return 1;
}
// Execute based on selected mode
if(use_omp)
{
#if defined(__AVX2__)
mine_coins_avx2_omp(max_attempts, max_time);
#else
fprintf(stderr, "Error: OpenMP mode requires AVX2 support. Compile with -mavx2 -fopenmp\n");
return 1;
#endif
}
else if(use_avx2)
{
#if defined(__AVX2__)
mine_coins_avx2(max_attempts, max_time);
#else
fprintf(stderr, "Error: AVX2 not available. Compile with -mavx2\n");
return 1;
#endif
}
else if(use_avx)
{
#if defined(__AVX__)
mine_coins_avx(max_attempts, max_time);
#else
fprintf(stderr, "Error: AVX not available. Compile with -mavx\n");
return 1;
#endif
}
else
{
// Auto-detect: use best available
#if defined(__AVX2__)
printf("Auto-detecting: Using AVX2 implementation\n");
mine_coins_avx2(max_attempts, max_time);
#elif defined(__AVX__)
printf("Auto-detecting: Using AVX implementation\n");
mine_coins_avx(max_attempts, max_time);
#else
fprintf(stderr, "Error: No SIMD instruction set available. Compile with -mavx or -mavx2\n");
return 1;
#endif
}
return 0;
}