AVX2 with OpenMP

Signed-off-by: Tiago Garcia <tiago.rgarcia@ua.pt>
This commit is contained in:
Tiago Garcia 2025-11-16 15:09:13 +00:00
parent cbb14ce858
commit d86863820b
Signed by: TiagoRG
GPG Key ID: DFCD48E3F420DB42
2 changed files with 117 additions and 7 deletions

View File

@ -294,6 +294,112 @@ static void mine_coins_avx2(u64_t max_attempts)
save_coin(NULL);
}
#endif
#if defined(__AVX2__)
#include <omp.h>
//
// mine DETI coins using AVX2 (8-way SIMD) + OpenMP
//
__attribute__((unused))
static void mine_coins_avx2_omp(u64_t max_attempts)
{
const int SIMD_WIDTH = 8;
int num_threads = omp_get_max_threads();
u64_t attempts = 0;
u32_t coins_found = 0;
printf("Mining DETI coins using AVX2 (8-way SIMD) + OpenMP (%d threads)...\n", num_threads);
printf("Press Ctrl+C to stop\n\n");
time_measurement();
time_measurement();
double start_time = wall_time_delta();
#pragma omp parallel
{
u32_t base_coin[14];
u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(32)));
u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(32)));
// u32_t thread_coins_found = 0;
u64_t thread_attempts = 0;
// Initialize base coin template (unique per thread)
memset(base_coin, 0, sizeof(base_coin));
((u08_t *)base_coin)[0x0 ^ 3] = 'D';
((u08_t *)base_coin)[0x1 ^ 3] = 'E';
((u08_t *)base_coin)[0x2 ^ 3] = 'T';
((u08_t *)base_coin)[0x3 ^ 3] = 'I';
((u08_t *)base_coin)[0x4 ^ 3] = ' ';
((u08_t *)base_coin)[0x5 ^ 3] = 'c';
((u08_t *)base_coin)[0x6 ^ 3] = 'o';
((u08_t *)base_coin)[0x7 ^ 3] = 'i';
((u08_t *)base_coin)[0x8 ^ 3] = 'n';
((u08_t *)base_coin)[0x9 ^ 3] = ' ';
((u08_t *)base_coin)[0xa ^ 3] = '2';
((u08_t *)base_coin)[0xb ^ 3] = ' ';
((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part with A-Z cycling pattern (offset per thread)
int thread_id = omp_get_thread_num();
for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A' + ((i - 12 + thread_id * SIMD_WIDTH) % 26);
while(keep_running && (max_attempts == 0 || (attempts + thread_attempts) < max_attempts))
{
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash);
thread_attempts += SIMD_WIDTH;
u32_t hashes[SIMD_WIDTH][5];
extract_hashes(interleaved_hash, hashes, SIMD_WIDTH);
for(int lane = 0; lane < SIMD_WIDTH; lane++)
{
if(is_valid_coin(hashes[lane]))
{
#pragma omp critical
{
coins_found++;
u32_t coins[SIMD_WIDTH][14];
extract_coins(interleaved_data, coins, SIMD_WIDTH);
printf("COIN FOUND! (attempt %llu, thread %d, lane %d)\n",
(unsigned long long)(attempts + thread_attempts - SIMD_WIDTH + lane),
thread_id, lane);
save_coin(coins[lane]);
}
}
}
// Print progress every 1M attempts (per thread)
if(thread_attempts % 1000000 < SIMD_WIDTH)
{
#pragma omp critical
{
time_measurement();
double current_time = wall_time_delta() - start_time;
double rate = (attempts + thread_attempts) / current_time;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n",
(unsigned long long)(attempts + thread_attempts), rate / 1e6, coins_found);
}
}
}
#pragma omp atomic
attempts += thread_attempts;
}
time_measurement();
double total_time = wall_time_delta() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
save_coin(NULL);
}
#endif
int main(int argc, char *argv[])
{
@ -305,8 +411,13 @@ int main(int argc, char *argv[])
max_attempts = strtoull(argv[1], NULL, 10);
#if defined(__AVX2__)
printf("Using AVX2 implementation\n");
mine_coins_avx2(max_attempts);
if(argc > 2 && strcmp(argv[2], "omp") == 0) {
printf("Using AVX2 + OpenMP implementation\n");
mine_coins_avx2_omp(max_attempts);
} else {
printf("Using AVX2 implementation\n");
mine_coins_avx2(max_attempts);
}
#elif defined(__AVX__)
printf("Using AVX implementation\n");
mine_coins_avx(max_attempts);
@ -317,4 +428,3 @@ int main(int argc, char *argv[])
return 0;
}

View File

@ -76,7 +76,7 @@ coin_miner_cpu: aad_coin_miner_cpu.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
coin_miner_simd: aad_coin_miner_simd.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
cc -march=native -Wall -Wshadow -Werror -fopenmp -mavx2 -O3 $< -o $@
coin_miner_cuda_kernel.cubin: aad_coin_miner_cuda_kernel.cu aad_sha1.h makefile
nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@