AVX2 with OpenMP
Signed-off-by: Tiago Garcia <tiago.rgarcia@ua.pt>
This commit is contained in:
parent
cbb14ce858
commit
d86863820b
|
|
@ -294,6 +294,112 @@ static void mine_coins_avx2(u64_t max_attempts)
|
|||
save_coin(NULL);
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#include <omp.h>
|
||||
//
|
||||
// mine DETI coins using AVX2 (8-way SIMD) + OpenMP
|
||||
//
|
||||
__attribute__((unused))
|
||||
static void mine_coins_avx2_omp(u64_t max_attempts)
|
||||
{
|
||||
const int SIMD_WIDTH = 8;
|
||||
int num_threads = omp_get_max_threads();
|
||||
u64_t attempts = 0;
|
||||
u32_t coins_found = 0;
|
||||
|
||||
printf("Mining DETI coins using AVX2 (8-way SIMD) + OpenMP (%d threads)...\n", num_threads);
|
||||
printf("Press Ctrl+C to stop\n\n");
|
||||
|
||||
time_measurement();
|
||||
time_measurement();
|
||||
double start_time = wall_time_delta();
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
u32_t base_coin[14];
|
||||
u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(32)));
|
||||
u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(32)));
|
||||
// u32_t thread_coins_found = 0;
|
||||
u64_t thread_attempts = 0;
|
||||
|
||||
// Initialize base coin template (unique per thread)
|
||||
memset(base_coin, 0, sizeof(base_coin));
|
||||
((u08_t *)base_coin)[0x0 ^ 3] = 'D';
|
||||
((u08_t *)base_coin)[0x1 ^ 3] = 'E';
|
||||
((u08_t *)base_coin)[0x2 ^ 3] = 'T';
|
||||
((u08_t *)base_coin)[0x3 ^ 3] = 'I';
|
||||
((u08_t *)base_coin)[0x4 ^ 3] = ' ';
|
||||
((u08_t *)base_coin)[0x5 ^ 3] = 'c';
|
||||
((u08_t *)base_coin)[0x6 ^ 3] = 'o';
|
||||
((u08_t *)base_coin)[0x7 ^ 3] = 'i';
|
||||
((u08_t *)base_coin)[0x8 ^ 3] = 'n';
|
||||
((u08_t *)base_coin)[0x9 ^ 3] = ' ';
|
||||
((u08_t *)base_coin)[0xa ^ 3] = '2';
|
||||
((u08_t *)base_coin)[0xb ^ 3] = ' ';
|
||||
((u08_t *)base_coin)[0x36 ^ 3] = '\n';
|
||||
((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
|
||||
|
||||
// Initialize variable part with A-Z cycling pattern (offset per thread)
|
||||
int thread_id = omp_get_thread_num();
|
||||
for(int i = 12; i < 54; i++)
|
||||
((u08_t *)base_coin)[i ^ 3] = 'A' + ((i - 12 + thread_id * SIMD_WIDTH) % 26);
|
||||
|
||||
while(keep_running && (max_attempts == 0 || (attempts + thread_attempts) < max_attempts))
|
||||
{
|
||||
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
|
||||
sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash);
|
||||
thread_attempts += SIMD_WIDTH;
|
||||
|
||||
u32_t hashes[SIMD_WIDTH][5];
|
||||
extract_hashes(interleaved_hash, hashes, SIMD_WIDTH);
|
||||
|
||||
for(int lane = 0; lane < SIMD_WIDTH; lane++)
|
||||
{
|
||||
if(is_valid_coin(hashes[lane]))
|
||||
{
|
||||
#pragma omp critical
|
||||
{
|
||||
coins_found++;
|
||||
u32_t coins[SIMD_WIDTH][14];
|
||||
extract_coins(interleaved_data, coins, SIMD_WIDTH);
|
||||
printf("COIN FOUND! (attempt %llu, thread %d, lane %d)\n",
|
||||
(unsigned long long)(attempts + thread_attempts - SIMD_WIDTH + lane),
|
||||
thread_id, lane);
|
||||
save_coin(coins[lane]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print progress every 1M attempts (per thread)
|
||||
if(thread_attempts % 1000000 < SIMD_WIDTH)
|
||||
{
|
||||
#pragma omp critical
|
||||
{
|
||||
time_measurement();
|
||||
double current_time = wall_time_delta() - start_time;
|
||||
double rate = (attempts + thread_attempts) / current_time;
|
||||
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n",
|
||||
(unsigned long long)(attempts + thread_attempts), rate / 1e6, coins_found);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp atomic
|
||||
attempts += thread_attempts;
|
||||
}
|
||||
|
||||
time_measurement();
|
||||
double total_time = wall_time_delta() - start_time;
|
||||
|
||||
printf("\n=== Mining Statistics ===\n");
|
||||
printf("Total attempts: %llu\n", (unsigned long long)attempts);
|
||||
printf("Total time: %.2f seconds\n", total_time);
|
||||
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
|
||||
printf("Coins found: %u\n", coins_found);
|
||||
|
||||
save_coin(NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
|
@ -305,8 +411,13 @@ int main(int argc, char *argv[])
|
|||
max_attempts = strtoull(argv[1], NULL, 10);
|
||||
|
||||
#if defined(__AVX2__)
|
||||
if(argc > 2 && strcmp(argv[2], "omp") == 0) {
|
||||
printf("Using AVX2 + OpenMP implementation\n");
|
||||
mine_coins_avx2_omp(max_attempts);
|
||||
} else {
|
||||
printf("Using AVX2 implementation\n");
|
||||
mine_coins_avx2(max_attempts);
|
||||
}
|
||||
#elif defined(__AVX__)
|
||||
printf("Using AVX implementation\n");
|
||||
mine_coins_avx(max_attempts);
|
||||
|
|
@ -317,4 +428,3 @@ int main(int argc, char *argv[])
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
2
makefile
2
makefile
|
|
@ -76,7 +76,7 @@ coin_miner_cpu: aad_coin_miner_cpu.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h
|
|||
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
|
||||
|
||||
coin_miner_simd: aad_coin_miner_simd.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile
|
||||
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
|
||||
cc -march=native -Wall -Wshadow -Werror -fopenmp -mavx2 -O3 $< -o $@
|
||||
|
||||
coin_miner_cuda_kernel.cubin: aad_coin_miner_cuda_kernel.cu aad_sha1.h makefile
|
||||
nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@
|
||||
|
|
|
|||
Loading…
Reference in New Issue