From d86863820b03398ce6ce09835cea19a89128fd31 Mon Sep 17 00:00:00 2001 From: Tiago Garcia Date: Sun, 16 Nov 2025 15:09:13 +0000 Subject: [PATCH] AVX2 with OpenMP Signed-off-by: Tiago Garcia --- aad_coin_miner_simd.c | 120 ++++++++++++++++++++++++++++++++++++++++-- makefile | 4 +- 2 files changed, 117 insertions(+), 7 deletions(-) diff --git a/aad_coin_miner_simd.c b/aad_coin_miner_simd.c index 28405bd..24c1195 100644 --- a/aad_coin_miner_simd.c +++ b/aad_coin_miner_simd.c @@ -62,7 +62,7 @@ static int increment_coin(u32_t coin[14]) break; // No carry needed } } - + // Return 0 if we carried all the way through (overflow), 1 otherwise return (pos >= 12); } @@ -82,7 +82,7 @@ static void prepare_coins(u32_t base_coin[14], u32_t *interleaved_data, int simd { interleaved_data[idx * simd_width + lane] = coin[idx]; } - + // Increment the base coin for the next lane increment_coin(base_coin); } @@ -294,6 +294,112 @@ static void mine_coins_avx2(u64_t max_attempts) save_coin(NULL); } #endif +#if defined(__AVX2__) +#include +// +// mine DETI coins using AVX2 (8-way SIMD) + OpenMP +// +__attribute__((unused)) +static void mine_coins_avx2_omp(u64_t max_attempts) +{ + const int SIMD_WIDTH = 8; + int num_threads = omp_get_max_threads(); + u64_t attempts = 0; + u32_t coins_found = 0; + + printf("Mining DETI coins using AVX2 (8-way SIMD) + OpenMP (%d threads)...\n", num_threads); + printf("Press Ctrl+C to stop\n\n"); + + time_measurement(); + time_measurement(); + double start_time = wall_time_delta(); + + #pragma omp parallel + { + u32_t base_coin[14]; + u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(32))); + u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(32))); + // u32_t thread_coins_found = 0; + u64_t thread_attempts = 0; + + // Initialize base coin template (unique per thread) + memset(base_coin, 0, sizeof(base_coin)); + ((u08_t *)base_coin)[0x0 ^ 3] = 'D'; + ((u08_t *)base_coin)[0x1 ^ 3] = 'E'; + ((u08_t *)base_coin)[0x2 ^ 3] = 'T'; + ((u08_t *)base_coin)[0x3 ^ 3] = 'I'; + ((u08_t *)base_coin)[0x4 ^ 3] = ' '; + ((u08_t *)base_coin)[0x5 ^ 3] = 'c'; + ((u08_t *)base_coin)[0x6 ^ 3] = 'o'; + ((u08_t *)base_coin)[0x7 ^ 3] = 'i'; + ((u08_t *)base_coin)[0x8 ^ 3] = 'n'; + ((u08_t *)base_coin)[0x9 ^ 3] = ' '; + ((u08_t *)base_coin)[0xa ^ 3] = '2'; + ((u08_t *)base_coin)[0xb ^ 3] = ' '; + ((u08_t *)base_coin)[0x36 ^ 3] = '\n'; + ((u08_t *)base_coin)[0x37 ^ 3] = 0x80; + + // Initialize variable part with A-Z cycling pattern (offset per thread) + int thread_id = omp_get_thread_num(); + for(int i = 12; i < 54; i++) + ((u08_t *)base_coin)[i ^ 3] = 'A' + ((i - 12 + thread_id * SIMD_WIDTH) % 26); + + while(keep_running && (max_attempts == 0 || (attempts + thread_attempts) < max_attempts)) + { + prepare_coins(base_coin, interleaved_data, SIMD_WIDTH); + sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash); + thread_attempts += SIMD_WIDTH; + + u32_t hashes[SIMD_WIDTH][5]; + extract_hashes(interleaved_hash, hashes, SIMD_WIDTH); + + for(int lane = 0; lane < SIMD_WIDTH; lane++) + { + if(is_valid_coin(hashes[lane])) + { + #pragma omp critical + { + coins_found++; + u32_t coins[SIMD_WIDTH][14]; + extract_coins(interleaved_data, coins, SIMD_WIDTH); + printf("COIN FOUND! (attempt %llu, thread %d, lane %d)\n", + (unsigned long long)(attempts + thread_attempts - SIMD_WIDTH + lane), + thread_id, lane); + save_coin(coins[lane]); + } + } + } + + // Print progress every 1M attempts (per thread) + if(thread_attempts % 1000000 < SIMD_WIDTH) + { + #pragma omp critical + { + time_measurement(); + double current_time = wall_time_delta() - start_time; + double rate = (attempts + thread_attempts) / current_time; + printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n", + (unsigned long long)(attempts + thread_attempts), rate / 1e6, coins_found); + } + } + } + + #pragma omp atomic + attempts += thread_attempts; + } + + time_measurement(); + double total_time = wall_time_delta() - start_time; + + printf("\n=== Mining Statistics ===\n"); + printf("Total attempts: %llu\n", (unsigned long long)attempts); + printf("Total time: %.2f seconds\n", total_time); + printf("Average rate: %.2f attempts/second\n", attempts / total_time); + printf("Coins found: %u\n", coins_found); + + save_coin(NULL); +} +#endif int main(int argc, char *argv[]) { @@ -305,8 +411,13 @@ int main(int argc, char *argv[]) max_attempts = strtoull(argv[1], NULL, 10); #if defined(__AVX2__) - printf("Using AVX2 implementation\n"); - mine_coins_avx2(max_attempts); + if(argc > 2 && strcmp(argv[2], "omp") == 0) { + printf("Using AVX2 + OpenMP implementation\n"); + mine_coins_avx2_omp(max_attempts); + } else { + printf("Using AVX2 implementation\n"); + mine_coins_avx2(max_attempts); + } #elif defined(__AVX__) printf("Using AVX implementation\n"); mine_coins_avx(max_attempts); @@ -317,4 +428,3 @@ int main(int argc, char *argv[]) return 0; } - diff --git a/makefile b/makefile index 19bf4d2..f945203 100644 --- a/makefile +++ b/makefile @@ -76,7 +76,7 @@ coin_miner_cpu: aad_coin_miner_cpu.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@ coin_miner_simd: aad_coin_miner_simd.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile - cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@ + cc -march=native -Wall -Wshadow -Werror -fopenmp -mavx2 -O3 $< -o $@ coin_miner_cuda_kernel.cubin: aad_coin_miner_cuda_kernel.cu aad_sha1.h makefile nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@ @@ -91,4 +91,4 @@ miners: coin_miner_cpu coin_miner_simd coin_miner_cuda benchmark all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin \ coin_miner_cpu coin_miner_simd coin_miner_cuda coin_miner_cuda_kernel.cubin \ - benchmark \ No newline at end of file + benchmark