Compare commits

..

16 Commits

Author SHA1 Message Date
Tiago Garcia a906816cd4
Better usability in browser
Signed-off-by: Tiago Garcia <tiago.rgarcia@ua.pt>
2025-11-16 21:58:14 +00:00
Tiago Garcia fa06494b59
WebAssembly
Signed-off-by: Tiago Garcia <tiago.rgarcia@ua.pt>
2025-11-16 19:41:51 +00:00
Tiago Garcia d86863820b
AVX2 with OpenMP
Signed-off-by: Tiago Garcia <tiago.rgarcia@ua.pt>
2025-11-16 15:09:13 +00:00
RubenCGomes cbb14ce858
2 typos fixed + cuda kernel now uses values across 0-127
Signed-off-by: RubenCGomes <rlcg@ua.pt>
2025-11-11 00:15:42 +00:00
RubenCGomes bd06cb1133
make CUDA version mine like the others so comparisons can be made
Signed-off-by: RubenCGomes <rlcg@ua.pt>
2025-11-05 17:14:08 +00:00
RubenCGomes 4ca44a528f
small change from assignment
Signed-off-by: RubenCGomes <rlcg@ua.pt>
2025-11-03 11:52:45 +00:00
RubenCGomes 34b02aec4b
Merge remote-tracking branch 'origin/master' 2025-11-02 23:48:44 +00:00
copilot-swe-agent[bot] a1d8d8d11a
update makefile to include all builds 2025-11-02 23:47:27 +00:00
RubenCGomes 492e53c3d8
update makefile to include all builds 2025-11-02 23:37:40 +00:00
Rúben Gomes 1cc1f28e72
adapted SIMD miner logic to work the same as CPU
Adapt SIMD miner to use CPU miner's sequential coin iteration logic
2025-11-02 23:34:26 +00:00
Rúben Gomes 90f2226a8a
Delete _codeql_detected_source_root 2025-11-02 23:32:57 +00:00
copilot-swe-agent[bot] 7684c75469 Improve code comments based on review feedback
Co-authored-by: RubenCGomes <116815718+RubenCGomes@users.noreply.github.com>
2025-11-02 23:22:21 +00:00
copilot-swe-agent[bot] e57678f067 Add makefile targets for coin miners and final testing
Co-authored-by: RubenCGomes <116815718+RubenCGomes@users.noreply.github.com>
2025-11-02 23:20:20 +00:00
copilot-swe-agent[bot] d0f5425f78 Adapt SIMD miner to use CPU miner's counter logic
Co-authored-by: RubenCGomes <116815718+RubenCGomes@users.noreply.github.com>
2025-11-02 23:18:04 +00:00
copilot-swe-agent[bot] d980e4775c Initial exploration of CPU and SIMD miner implementations
Co-authored-by: RubenCGomes <116815718+RubenCGomes@users.noreply.github.com>
2025-11-02 23:15:05 +00:00
copilot-swe-agent[bot] b255028025 Initial plan 2025-11-02 23:12:00 +00:00
15 changed files with 923 additions and 249 deletions

5
.gitignore vendored
View File

@ -60,3 +60,8 @@ CMakeUserPresets.json
*.ptx *.ptx
*.cubin *.cubin
*.fatbin *.fatbin
# Coin miner executables
coin_miner_cpu
coin_miner_simd
coin_miner_cuda

View File

@ -26,59 +26,13 @@ void signal_handler(int signum)
keep_running = 0; keep_running = 0;
} }
// Histogram data structures
typedef struct {
u32_t bins[MAX_HISTOGRAM_BINS];
u32_t count;
double min_value;
double max_value;
} histogram_t;
static void histogram_init(histogram_t *h)
{
memset(h->bins, 0, sizeof(h->bins));
h->count = 0;
h->min_value = 1e99;
h->max_value = 0.0;
}
static void histogram_add(histogram_t *h, double value)
{
if(value < h->min_value)
h->min_value = value;
if(value > h->max_value)
h->max_value = value;
h->count++;
// For now, just count - we'll bin them later when printing
}
static void histogram_print(histogram_t *h, const char *title, int n_bins)
{
if(h->count == 0)
{
printf("%s: No data\n", title);
return;
}
printf("\n%s:\n", title);
printf(" Count: %u\n", h->count);
printf(" Min: %.6f\n", h->min_value);
printf(" Max: %.6f\n", h->max_value);
printf(" Avg: %.6f\n", (h->min_value + h->max_value) / 2.0);
}
// Coin reconstruction from stored data // Coin reconstruction from stored data
static void reconstruct_coin(u32_t *stored_data, u32_t coin[14]) static void reconstruct_coin(u32_t *stored_data, u32_t coin[14])
{ {
// Fixed parts (must match kernel byte order) // Simply copy the complete coin data from storage
coin[0] = 0x44455449u; // "DETI" with byte swap (idx ^ 3) for(int i = 0; i < 14; i++)
coin[1] = 0x20636F69u; // " coi" with byte swap (idx ^ 3) coin[i] = stored_data[i];
coin[2] = 0x6E203220u; // "n 2 " with byte swap (idx ^ 3)
// Variable parts (restore from storage)
for(int i = 0; i < 11; i++)
coin[3 + i] = stored_data[i];
} }
// //
@ -92,19 +46,6 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
u32_t coins_found = 0; u32_t coins_found = 0;
u32_t kernel_runs = 0; u32_t kernel_runs = 0;
// Histograms
histogram_t time_histogram;
histogram_t coins_histogram;
double *kernel_times = NULL;
u32_t *kernel_coin_counts = NULL;
u32_t histogram_capacity = 10000;
histogram_init(&time_histogram);
histogram_init(&coins_histogram);
kernel_times = (double *)malloc(histogram_capacity * sizeof(double));
kernel_coin_counts = (u32_t *)malloc(histogram_capacity * sizeof(u32_t));
// Initialize CUDA // Initialize CUDA
cd.device_number = 0; cd.device_number = 0;
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin"; cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
@ -117,7 +58,7 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
host_storage = (u32_t *)cd.host_data[0]; host_storage = (u32_t *)cd.host_data[0];
// Kernel configuration // Kernel configuration
cd.block_dim_x = RECOMENDED_CUDA_BLOCK_SIZE; cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE;
cd.grid_dim_x = 4096; // Large grid for maximum GPU utilization cd.grid_dim_x = 4096; // Large grid for maximum GPU utilization
u32_t n_threads = cd.grid_dim_x * cd.block_dim_x; u32_t n_threads = cd.grid_dim_x * cd.block_dim_x;
@ -133,9 +74,7 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
int scan_pos = 12; int scan_pos = 12;
time_measurement(); time_measurement();
time_measurement(); // double start_time = wall_time_delta();
double start_time = wall_time_delta();
double last_report_time = 0.0;
while(keep_running && (max_attempts == 0 || attempts < max_attempts)) while(keep_running && (max_attempts == 0 || attempts < max_attempts))
{ {
@ -153,13 +92,8 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
if(use_scan_kernel) if(use_scan_kernel)
cd.arg[3] = &scan_pos; cd.arg[3] = &scan_pos;
// Launch kernel and measure time // Launch the CUDA kernel
time_measurement(); launch_kernel(&cd);
double kernel_start = cpu_time_delta();
lauch_kernel(&cd);
time_measurement();
double kernel_end = cpu_time_delta();
double kernel_time = kernel_end - kernel_start;
// Copy results back // Copy results back
device_to_host_copy(&cd, 0); device_to_host_copy(&cd, 0);
@ -208,16 +142,6 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
} }
} }
// Update histograms
if(kernel_runs < histogram_capacity)
{
kernel_times[kernel_runs] = kernel_time;
kernel_coin_counts[kernel_runs] = n_coins_this_kernel;
}
histogram_add(&time_histogram, kernel_time);
histogram_add(&coins_histogram, (double)n_coins_this_kernel);
// Update counters // Update counters
kernel_runs++; kernel_runs++;
if(use_scan_kernel) if(use_scan_kernel)
@ -231,21 +155,11 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
if(use_scan_kernel) if(use_scan_kernel)
scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53 scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53
// Print progress every second
time_measurement();
double current_time = wall_time_delta() - start_time;
if(current_time - last_report_time >= 1.0)
{
double rate = attempts / current_time;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Kernels: %u, Avg time: %.6f s\n",
(unsigned long long)attempts, rate / 1e6, coins_found, kernel_runs,
current_time / kernel_runs);
last_report_time = current_time;
}
} }
time_measurement(); time_measurement();
double total_time = wall_time_delta() - start_time; double total_time = cpu_time_delta();
printf("\n=== Mining Statistics ===\n"); printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts); printf("Total attempts: %llu\n", (unsigned long long)attempts);
@ -254,30 +168,10 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
printf("Coins found: %u\n", coins_found); printf("Coins found: %u\n", coins_found);
printf("Kernel launches: %u\n", kernel_runs); printf("Kernel launches: %u\n", kernel_runs);
// Print histograms
histogram_print(&time_histogram, "Kernel Execution Time Histogram", 20);
histogram_print(&coins_histogram, "Coins Found Per Kernel Histogram", 10);
// Save detailed histogram data
FILE *fp = fopen("cuda_kernel_stats.csv", "w");
if(fp != NULL)
{
fprintf(fp, "kernel_id,time_seconds,coins_found\n");
u32_t n_to_save = (kernel_runs < histogram_capacity) ? kernel_runs : histogram_capacity;
for(u32_t i = 0; i < n_to_save; i++)
{
fprintf(fp, "%u,%.9f,%u\n", i, kernel_times[i], kernel_coin_counts[i]);
}
fclose(fp);
printf("\nDetailed statistics saved to cuda_kernel_stats.csv\n");
}
// Save any remaining coins // Save any remaining coins
save_coin(NULL); save_coin(NULL);
// Cleanup
free(kernel_times);
free(kernel_coin_counts);
terminate_cuda(&cd); terminate_cuda(&cd);
} }

View File

@ -5,26 +5,23 @@
// //
#include "aad_sha1.h" #include "aad_sha1.h"
#include "aad_data_types.h"
typedef unsigned int u32_t;
typedef unsigned char u08_t;
// //
// Optimized CUDA kernel for DETI coin mining // Optimized CUDA kernel for DETI coin mining
// Each thread generates its own message based on thread coordinates and external parameters // Each thread generates coins using the same approach as CPU/SIMD miners
// //
extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1) extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2) void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2)
{ {
u32_t coin[14]; u32_t coin[14];
u32_t hash[5]; u32_t hash[5];
u32_t n, warp_id, lane_id; u32_t n;
u08_t *bytes = (u08_t *)coin;
// Get thread coordinates // Get thread index (used as offset from base counter)
n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x; n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x;
warp_id = n >> 5u;
lane_id = n & 31u;
// Initialize coin template: "DETI coin 2 " + variable + "\n\x80" // Initialize coin template: "DETI coin 2 " + variable + "\n\x80"
// Use byte-swapped format to match host expectations (idx ^ 3) // Use byte-swapped format to match host expectations (idx ^ 3)
@ -32,25 +29,37 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param
coin[1] = 0x20636F69u; // " coi" with byte swap coin[1] = 0x20636F69u; // " coi" with byte swap
coin[2] = 0x6E203220u; // "n 2 " with byte swap coin[2] = 0x6E203220u; // "n 2 " with byte swap
// Variable part: encode thread ID and parameters // Initialize variable part (positions 12-53, 42 bytes)
// This ensures each thread works on a different message // Start with A-Z pattern like CPU/SIMD miners
coin[3] = n; // Global thread ID for(int i = 12; i < 54; i++)
coin[4] = param1; // External parameter 1 bytes[i ^ 3] = 'A' + ((i - 12) % 26);
coin[5] = param2; // External parameter 2
coin[6] = blockIdx.x; // Block index
coin[7] = threadIdx.x; // Thread index
coin[8] = warp_id; // Warp ID
coin[9] = lane_id; // Lane ID
coin[10] = n ^ param1; // XOR combination
coin[11] = n ^ param2; // XOR combination
coin[12] = (n * 0x9E3779B9u); // Hash-like mixing
// Last word: bytes 52-55 // End with newline and padding
// Memory layout: coin[13]=0xAABBCCDD -> mem[52]=DD, [53]=CC, [54]=BB, [55]=AA bytes[0x36 ^ 3] = '\n'; // Position 54
// With idx^3: bytes[52^3]=bytes[55]=AA, bytes[53^3]=bytes[54]=BB, bytes[54^3]=bytes[53]=CC, bytes[55^3]=bytes[52]=DD bytes[0x37 ^ 3] = 0x80; // Position 55
// We want: bytes[54^3]=0x0A (newline), bytes[55^3]=0x80 (padding)
// So: bytes[53]=0x0A, bytes[52]=0x80 -> coin[13]=0x????0A80 // Calculate offset based on thread index and parameters
coin[13] = ((n & 0xFFFFu) << 16) | 0x0A80u; // Top 2 bytes: variable, bottom: 0x80 0x0A // This creates a unique starting point for each thread
u64_t offset = ((u64_t)param1 << 32) | param2;
offset += (u64_t)n;
// Apply offset to variable part (increment the coin counter)
for(int pos = 53; pos >= 12 && offset > 0; pos--)
{
u08_t *byte = &bytes[pos ^ 3];
u64_t add = offset % 127;
offset /= 127;
u32_t val = *byte + add;
u08_t new_val = val % 127;
// Skip newline character (ASCII 10) in the variable part
if(new_val == '\n')
new_val++;
*byte = new_val;
offset += val / 127; // Carry
}
// Compute SHA1 hash // Compute SHA1 hash
# define T u32_t # define T u32_t
@ -74,22 +83,9 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param
// Make sure we don't write outside buffer // Make sure we don't write outside buffer
if(idx < 1024u - 14u) if(idx < 1024u - 14u)
{ {
// Store the coin data (only variable parts needed) // Store the complete coin data
coins_storage_area[idx + 0] = coin[ 3]; for(int i = 0; i < 14; i++)
coins_storage_area[idx + 1] = coin[ 4]; coins_storage_area[idx + i] = coin[i];
coins_storage_area[idx + 2] = coin[ 5];
coins_storage_area[idx + 3] = coin[ 6];
coins_storage_area[idx + 4] = coin[ 7];
coins_storage_area[idx + 5] = coin[ 8];
coins_storage_area[idx + 6] = coin[ 9];
coins_storage_area[idx + 7] = coin[10];
coins_storage_area[idx + 8] = coin[11];
coins_storage_area[idx + 9] = coin[12];
coins_storage_area[idx + 10] = coin[13];
// Store hash value for verification
coins_storage_area[idx + 11] = hash[1];
coins_storage_area[idx + 12] = hash[2];
coins_storage_area[idx + 13] = hash[3];
} }
} }
} }
@ -98,12 +94,13 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param
// Kernel that tries all possible values for one character position // Kernel that tries all possible values for one character position
// //
extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1) extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2, int scan_position) void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2, int scan_position)
{ {
u32_t coin[14]; u32_t coin[14];
u32_t hash[5]; u32_t hash[5];
u32_t n; u32_t n;
u08_t *bytes = (u08_t *)coin;
n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x; n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x;
@ -112,36 +109,49 @@ void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t
coin[1] = 0x20636F69u; // " coi" with byte swap coin[1] = 0x20636F69u; // " coi" with byte swap
coin[2] = 0x6E203220u; // "n 2 " with byte swap coin[2] = 0x6E203220u; // "n 2 " with byte swap
// Variable part // Initialize variable part with A-Z pattern
coin[3] = param1; for(int i = 12; i < 54; i++)
coin[4] = param2; bytes[i ^ 3] = 'A' + ((i - 12) % 26);
coin[5] = n >> 8; // High bits of n
coin[6] = blockIdx.x;
coin[7] = threadIdx.x;
coin[8] = param1 ^ param2;
coin[9] = n & 0xFFu; // Low 8 bits of n
coin[10] = param1 + n;
coin[11] = param2 - n;
coin[12] = (n * 0x9E3779B9u);
coin[13] = ((n & 0xFFFFu) << 16) | 0x0A80u; // Top 2 bytes: variable, bottom: 0x80 0x0A
// Try all possible values for the scan position (0-255) // End with newline and padding
// This allows exploring a full byte range in a single kernel launch bytes[0x36 ^ 3] = '\n'; // Position 54
for(u32_t val = 0; val < 256u; val++) bytes[0x37 ^ 3] = 0x80; // Position 55
// Apply base offset from parameters (similar to main kernel)
u64_t offset = ((u64_t)param1 << 32) | param2;
offset += (u64_t)n;
// Apply offset to all positions except the scan position
for(int pos = 53; pos >= 12 && offset > 0; pos--)
{ {
// Insert the test value at the scan position if(pos == scan_position)
u32_t word_idx = scan_position / 4; continue; // Skip the scan position
u32_t byte_pos = scan_position % 4;
u32_t shift = byte_pos * 8;
if(word_idx >= 3 && word_idx < 13) u08_t *byte = &bytes[pos ^ 3];
u64_t add = offset % 95;
offset /= 95;
u32_t val = (*byte - 32 + add);
u08_t new_val = 32 + (val % 95);
// Skip newline character (ASCII 10) in the variable part
if(new_val == '\n')
new_val++;
*byte = new_val;
offset += val / 95;
}
// Try all possible printable ASCII values for the scan position (32-126)
for(u32_t val = 32; val < 127; val++)
{
// Set the test value at the scan position
if(scan_position >= 12 && scan_position < 54)
{ {
u32_t mask = ~(0xFFu << shift); bytes[scan_position ^ 3] = (u08_t)val;
coin[word_idx] = (coin[word_idx] & mask) | (val << shift);
// Make sure we don't use newline in the middle // Skip newline in the middle (it's only valid at position 54)
u08_t *bytes = (u08_t *)coin; if(scan_position != 54 && val == '\n')
if(scan_position < 54 && bytes[scan_position ^ 3] == 0x0A)
continue; continue;
} }
@ -164,20 +174,8 @@ void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t
u32_t idx = atomicAdd(coins_storage_area, 14u); u32_t idx = atomicAdd(coins_storage_area, 14u);
if(idx < 1024u - 14u) if(idx < 1024u - 14u)
{ {
coins_storage_area[idx + 0] = coin[ 3]; for(int i = 0; i < 14; i++)
coins_storage_area[idx + 1] = coin[ 4]; coins_storage_area[idx + i] = coin[i];
coins_storage_area[idx + 2] = coin[ 5];
coins_storage_area[idx + 3] = coin[ 6];
coins_storage_area[idx + 4] = coin[ 7];
coins_storage_area[idx + 5] = coin[ 8];
coins_storage_area[idx + 6] = coin[ 9];
coins_storage_area[idx + 7] = coin[10];
coins_storage_area[idx + 8] = coin[11];
coins_storage_area[idx + 9] = coin[12];
coins_storage_area[idx + 10] = coin[13];
coins_storage_area[idx + 11] = hash[1];
coins_storage_area[idx + 12] = hash[2];
coins_storage_area[idx + 13] = hash[3];
} }
} }
} }

View File

@ -30,39 +30,61 @@ static int is_valid_coin(u32_t *hash)
return hash[0] == 0xAAD20250u; return hash[0] == 0xAAD20250u;
} }
//
// increment coin variable part using the same logic as CPU miner
// returns 0 if overflow (all positions wrapped around), 1 otherwise
//
static int increment_coin(u32_t coin[14])
{
// Increment the variable part using byte-by-byte logic with carry
// Increment from the end to beginning (positions 53 down to 12)
int pos = 53;
while(pos >= 12)
{
u08_t *byte = &((u08_t *)coin)[pos ^ 3];
if(*byte == '\n' || *byte == 0x80)
*byte = 32; // Start from space
(*byte)++;
// Skip newline character
if(*byte == '\n')
(*byte)++;
// Wrap around at 127 (printable ASCII limit)
if(*byte >= 127)
{
*byte = 32; // Reset to space
pos--; // Carry to next position
}
else
{
break; // No carry needed
}
}
// Return 0 if we carried all the way through (overflow), 1 otherwise
return (pos >= 12);
}
// //
// prepare interleaved data for SIMD processing // prepare interleaved data for SIMD processing
// //
static void prepare_coins(u32_t base_coin[14], u32_t *interleaved_data, int simd_width, u64_t base_counter) static void prepare_coins(u32_t base_coin[14], u32_t *interleaved_data, int simd_width)
{ {
for(int lane = 0; lane < simd_width; lane++) for(int lane = 0; lane < simd_width; lane++)
{ {
u32_t coin[14]; u32_t coin[14];
memcpy(coin, base_coin, sizeof(coin)); memcpy(coin, base_coin, sizeof(coin));
// Modify the coin for this lane (encode counter in the variable part)
u64_t counter = base_counter + lane;
for(int i = 12; i < 20 && i < 54; i++)
{
int shift = (19 - i) * 8;
if(shift >= 0 && shift < 64)
{
u08_t byte = (counter >> shift) & 0xFF;
// Map to ASCII printable range (32-126, excluding newline position)
if(byte == '\n' || byte >= 0x80)
byte = 'X';
// Ensure byte is in printable ASCII range (32-126)
// Map all values to ASCII printable characters: space (32) to tilde (126)
// byte = 32 + (byte % 95);
((u08_t *)coin)[i ^ 3] = byte;
}
}
// Interleave the data // Interleave the data
for(int idx = 0; idx < 14; idx++) for(int idx = 0; idx < 14; idx++)
{ {
interleaved_data[idx * simd_width + lane] = coin[idx]; interleaved_data[idx * simd_width + lane] = coin[idx];
} }
// Increment the base coin for the next lane
increment_coin(base_coin);
} }
} }
@ -125,19 +147,21 @@ static void mine_coins_avx(u64_t max_attempts)
((u08_t *)base_coin)[0x36 ^ 3] = '\n'; ((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80; ((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part // Initialize variable part with A-Z cycling pattern (same as CPU miner)
for(int i = 12; i < 54; i++) for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A'; ((u08_t *)base_coin)[i ^ 3] = 'A' + (i - 12) % 26;
printf("Mining DETI coins using AVX (4-way SIMD)...\n"); printf("Mining DETI coins using AVX (4-way SIMD)...\n");
printf("Press Ctrl+C to stop\n\n"); printf("Press Ctrl+C to stop\n\n");
time_measurement(); time_measurement();
time_measurement();
double start_time = wall_time_delta();
while(keep_running && (max_attempts == 0 || attempts < max_attempts)) while(keep_running && (max_attempts == 0 || attempts < max_attempts))
{ {
// Prepare coins for this batch // Prepare coins for this batch
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH, attempts); prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
// Compute SHA1 hashes // Compute SHA1 hashes
sha1_avx((v4si *)interleaved_data, (v4si *)interleaved_hash); sha1_avx((v4si *)interleaved_data, (v4si *)interleaved_hash);
@ -163,13 +187,16 @@ static void mine_coins_avx(u64_t max_attempts)
// Print progress every 1M attempts // Print progress every 1M attempts
if(attempts % 1000000 < SIMD_WIDTH) if(attempts % 1000000 < SIMD_WIDTH)
{ {
printf("Attempts: %llu, Coins: %u\n", time_measurement();
(unsigned long long)attempts, coins_found); double current_time = wall_time_delta() - start_time;
double rate = attempts / current_time;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n",
(unsigned long long)attempts, rate / 1e6, coins_found);
} }
} }
time_measurement(); time_measurement();
double total_time = wall_time_delta(); double total_time = wall_time_delta() - start_time;
printf("\n=== Mining Statistics ===\n"); printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts); printf("Total attempts: %llu\n", (unsigned long long)attempts);
@ -212,17 +239,20 @@ static void mine_coins_avx2(u64_t max_attempts)
((u08_t *)base_coin)[0x36 ^ 3] = '\n'; ((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80; ((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part with A-Z cycling pattern (same as CPU miner)
for(int i = 12; i < 54; i++) for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A'; ((u08_t *)base_coin)[i ^ 3] = 'A' + (i - 12) % 26;
printf("Mining DETI coins using AVX2 (8-way SIMD)...\n"); printf("Mining DETI coins using AVX2 (8-way SIMD)...\n");
printf("Press Ctrl+C to stop\n\n"); printf("Press Ctrl+C to stop\n\n");
time_measurement(); time_measurement();
time_measurement();
double start_time = wall_time_delta();
while(keep_running && (max_attempts == 0 || attempts < max_attempts)) while(keep_running && (max_attempts == 0 || attempts < max_attempts))
{ {
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH, attempts); prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash); sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash);
attempts += SIMD_WIDTH; attempts += SIMD_WIDTH;
@ -244,13 +274,122 @@ static void mine_coins_avx2(u64_t max_attempts)
if(attempts % 1000000 < SIMD_WIDTH) if(attempts % 1000000 < SIMD_WIDTH)
{ {
printf("Attempts: %llu, Coins: %u\n", time_measurement();
(unsigned long long)attempts, coins_found); double current_time = wall_time_delta() - start_time;
double rate = attempts / current_time;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n",
(unsigned long long)attempts, rate / 1e6, coins_found);
} }
} }
time_measurement(); time_measurement();
double total_time = wall_time_delta(); double total_time = wall_time_delta() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
save_coin(NULL);
}
#endif
#if defined(__AVX2__)
#include <omp.h>
//
// mine DETI coins using AVX2 (8-way SIMD) + OpenMP
//
__attribute__((unused))
static void mine_coins_avx2_omp(u64_t max_attempts)
{
const int SIMD_WIDTH = 8;
int num_threads = omp_get_max_threads();
u64_t attempts = 0;
u32_t coins_found = 0;
printf("Mining DETI coins using AVX2 (8-way SIMD) + OpenMP (%d threads)...\n", num_threads);
printf("Press Ctrl+C to stop\n\n");
time_measurement();
time_measurement();
double start_time = wall_time_delta();
#pragma omp parallel
{
u32_t base_coin[14];
u32_t interleaved_data[14 * SIMD_WIDTH] __attribute__((aligned(32)));
u32_t interleaved_hash[5 * SIMD_WIDTH] __attribute__((aligned(32)));
// u32_t thread_coins_found = 0;
u64_t thread_attempts = 0;
// Initialize base coin template (unique per thread)
memset(base_coin, 0, sizeof(base_coin));
((u08_t *)base_coin)[0x0 ^ 3] = 'D';
((u08_t *)base_coin)[0x1 ^ 3] = 'E';
((u08_t *)base_coin)[0x2 ^ 3] = 'T';
((u08_t *)base_coin)[0x3 ^ 3] = 'I';
((u08_t *)base_coin)[0x4 ^ 3] = ' ';
((u08_t *)base_coin)[0x5 ^ 3] = 'c';
((u08_t *)base_coin)[0x6 ^ 3] = 'o';
((u08_t *)base_coin)[0x7 ^ 3] = 'i';
((u08_t *)base_coin)[0x8 ^ 3] = 'n';
((u08_t *)base_coin)[0x9 ^ 3] = ' ';
((u08_t *)base_coin)[0xa ^ 3] = '2';
((u08_t *)base_coin)[0xb ^ 3] = ' ';
((u08_t *)base_coin)[0x36 ^ 3] = '\n';
((u08_t *)base_coin)[0x37 ^ 3] = 0x80;
// Initialize variable part with A-Z cycling pattern (offset per thread)
int thread_id = omp_get_thread_num();
for(int i = 12; i < 54; i++)
((u08_t *)base_coin)[i ^ 3] = 'A' + ((i - 12 + thread_id * SIMD_WIDTH) % 26);
while(keep_running && (max_attempts == 0 || (attempts + thread_attempts) < max_attempts))
{
prepare_coins(base_coin, interleaved_data, SIMD_WIDTH);
sha1_avx2((v8si *)interleaved_data, (v8si *)interleaved_hash);
thread_attempts += SIMD_WIDTH;
u32_t hashes[SIMD_WIDTH][5];
extract_hashes(interleaved_hash, hashes, SIMD_WIDTH);
for(int lane = 0; lane < SIMD_WIDTH; lane++)
{
if(is_valid_coin(hashes[lane]))
{
#pragma omp critical
{
coins_found++;
u32_t coins[SIMD_WIDTH][14];
extract_coins(interleaved_data, coins, SIMD_WIDTH);
printf("COIN FOUND! (attempt %llu, thread %d, lane %d)\n",
(unsigned long long)(attempts + thread_attempts - SIMD_WIDTH + lane),
thread_id, lane);
save_coin(coins[lane]);
}
}
}
// Print progress every 1M attempts (per thread)
if(thread_attempts % 1000000 < SIMD_WIDTH)
{
#pragma omp critical
{
time_measurement();
double current_time = wall_time_delta() - start_time;
double rate = (attempts + thread_attempts) / current_time;
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n",
(unsigned long long)(attempts + thread_attempts), rate / 1e6, coins_found);
}
}
}
#pragma omp atomic
attempts += thread_attempts;
}
time_measurement();
double total_time = wall_time_delta() - start_time;
printf("\n=== Mining Statistics ===\n"); printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)attempts); printf("Total attempts: %llu\n", (unsigned long long)attempts);
@ -272,8 +411,13 @@ int main(int argc, char *argv[])
max_attempts = strtoull(argv[1], NULL, 10); max_attempts = strtoull(argv[1], NULL, 10);
#if defined(__AVX2__) #if defined(__AVX2__)
printf("Using AVX2 implementation\n"); if(argc > 2 && strcmp(argv[2], "omp") == 0) {
mine_coins_avx2(max_attempts); printf("Using AVX2 + OpenMP implementation\n");
mine_coins_avx2_omp(max_attempts);
} else {
printf("Using AVX2 implementation\n");
mine_coins_avx2(max_attempts);
}
#elif defined(__AVX__) #elif defined(__AVX__)
printf("Using AVX implementation\n"); printf("Using AVX implementation\n");
mine_coins_avx(max_attempts); mine_coins_avx(max_attempts);
@ -284,4 +428,3 @@ int main(int argc, char *argv[])
return 0; return 0;
} }

303
aad_coin_miner_wasm.c Normal file
View File

@ -0,0 +1,303 @@
//
// Arquiteturas de Alto Desempenho 2025/2026
//
// DETI Coin Miner - WebAssembly implementation
//
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "aad_data_types.h"
#ifdef __EMSCRIPTEN__
#include <emscripten.h>
#include "aad_sha1_wasm.h"
#else
#include "aad_sha1_cpu.h"
#include "aad_utilities.h"
#include "aad_vault.h"
#endif
// Global mining state
static volatile int keep_running = 1;
static u64_t total_attempts = 0;
static u32_t coins_found = 0;
static double mining_start_time = 0;
static double pause_time_offset = 0; // Track paused time
static double last_pause_time = 0; // When mining was paused
static u32_t found_coins[1024][14]; // Store up to 1024 found coins
static u32_t found_coins_count = 0;
//
// Check if a hash starts with aad20250
//
static int is_valid_coin(u32_t *hash)
{
return hash[0] == 0xAAD20250u;
}
//
// Increment coin variable part (positions 12-53)
//
static int increment_coin(u32_t coin[14])
{
int pos = 53;
while(pos >= 12)
{
u08_t *byte = &((u08_t *)coin)[pos ^ 3];
if(*byte == '\n' || *byte == 0x80)
*byte = 32;
(*byte)++;
if(*byte == '\n')
(*byte)++;
if(*byte >= 127)
{
*byte = 32;
pos--;
}
else
{
break;
}
}
return (pos >= 12);
}
//
// Get current time in seconds
//
static double get_time()
{
#ifdef __EMSCRIPTEN__
return emscripten_get_now() / 1000.0;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec / 1e9;
#endif
}
//
// Main mining iteration (called from JavaScript)
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
int mine_coins_wasm(u32_t iterations_per_call)
{
static u32_t coin[14];
static int initialized = 0;
u32_t hash[5];
// Initialize coin template on first call
if(!initialized)
{
memset(coin, 0, sizeof(coin));
((u08_t *)coin)[0x0 ^ 3] = 'D';
((u08_t *)coin)[0x1 ^ 3] = 'E';
((u08_t *)coin)[0x2 ^ 3] = 'T';
((u08_t *)coin)[0x3 ^ 3] = 'I';
((u08_t *)coin)[0x4 ^ 3] = ' ';
((u08_t *)coin)[0x5 ^ 3] = 'c';
((u08_t *)coin)[0x6 ^ 3] = 'o';
((u08_t *)coin)[0x7 ^ 3] = 'i';
((u08_t *)coin)[0x8 ^ 3] = 'n';
((u08_t *)coin)[0x9 ^ 3] = ' ';
((u08_t *)coin)[0xa ^ 3] = '2';
((u08_t *)coin)[0xb ^ 3] = ' ';
((u08_t *)coin)[0x36 ^ 3] = '\n';
((u08_t *)coin)[0x37 ^ 3] = 0x80;
for(int i = 12; i < 54; i++)
((u08_t *)coin)[i ^ 3] = 'A' + (i - 12) % 26;
mining_start_time = get_time();
initialized = 1;
}
if(!keep_running)
return 0;
// Mine for the specified number of iterations
for(u32_t i = 0; i < iterations_per_call && keep_running; i++)
{
sha1(coin, hash);
total_attempts++;
if(is_valid_coin(hash))
{
if(found_coins_count < 1024)
{
memcpy(found_coins[found_coins_count], coin, sizeof(coin));
found_coins_count++;
}
coins_found++;
#ifndef __EMSCRIPTEN__
printf("COIN FOUND! (attempt %llu)\n", (unsigned long long)total_attempts);
save_coin(coin);
#endif
}
increment_coin(coin);
}
return keep_running;
}
//
// Get mining statistics
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
void get_statistics(u64_t *attempts, u32_t *coins, double *hash_rate, double *elapsed_time)
{
*attempts = total_attempts;
*coins = coins_found;
double current_time = get_time();
double actual_elapsed;
if(!keep_running && last_pause_time > 0) {
// If paused, use the paused time
actual_elapsed = last_pause_time - mining_start_time - pause_time_offset;
} else {
// If running, calculate normally
actual_elapsed = current_time - mining_start_time - pause_time_offset;
}
*elapsed_time = actual_elapsed;
*hash_rate = (actual_elapsed > 0) ? (total_attempts / actual_elapsed) : 0;
}
//
// Stop mining
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
void stop_mining()
{
if(keep_running) {
keep_running = 0;
last_pause_time = get_time();
}
}
//
// Resume mining
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
void resume_mining()
{
if(!keep_running && last_pause_time > 0) {
double pause_duration = get_time() - last_pause_time;
pause_time_offset += pause_duration;
keep_running = 1;
last_pause_time = 0;
}
}
//
// Get found coin data (returns pointer to coin array)
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
u32_t* get_found_coin(u32_t index)
{
if(index < found_coins_count)
return found_coins[index];
return NULL;
}
//
// Get number of found coins
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
u32_t get_found_coins_count()
{
return found_coins_count;
}
//
// Reset mining state
//
#ifdef __EMSCRIPTEN__
EMSCRIPTEN_KEEPALIVE
#endif
void reset_mining()
{
keep_running = 1;
total_attempts = 0;
coins_found = 0;
found_coins_count = 0;
pause_time_offset = 0;
last_pause_time = 0;
mining_start_time = get_time();
}
//
// Main function (for standalone compilation/testing)
//
#ifndef __EMSCRIPTEN__
int main(int argc, char *argv[])
{
u64_t max_attempts = 0;
if(argc > 1)
max_attempts = strtoull(argv[1], NULL, 10);
printf("Mining DETI coins using WebAssembly implementation (standalone mode)...\n");
printf("Press Ctrl+C to stop\n\n");
time_measurement();
time_measurement();
double start_time = wall_time_delta();
double last_report = start_time;
while(keep_running && (max_attempts == 0 || total_attempts < max_attempts))
{
mine_coins_wasm(100000);
time_measurement();
double current_time = wall_time_delta() - start_time;
if(current_time - last_report >= 1.0)
{
u64_t attempts;
u32_t coins;
double hash_rate, elapsed;
get_statistics(&attempts, &coins, &hash_rate, &elapsed);
printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u\n",
(unsigned long long)attempts, hash_rate / 1e6, coins);
last_report = current_time;
}
}
time_measurement();
double total_time = wall_time_delta() - start_time;
printf("\n=== Mining Statistics ===\n");
printf("Total attempts: %llu\n", (unsigned long long)total_attempts);
printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", total_attempts / total_time);
printf("Coins found: %u\n", coins_found);
save_coin(NULL);
return 0;
}
#endif

View File

@ -36,7 +36,7 @@ typedef struct
CUdeviceptr device_data[2]; // the pointers to the device data CUdeviceptr device_data[2]; // the pointers to the device data
// launch kernel data // launch kernel data
unsigned int grid_dim_x; // the number of grid blocks (in the X dimension, the only one we will use here) unsigned int grid_dim_x; // the number of grid blocks (in the X dimension, the only one we will use here)
unsigned int block_dim_x; // the number of threads in a block (in the X dimension, the only one we will use here, should be equal to RECOMENDED_CUDA_BLOCK_SIZE) unsigned int block_dim_x; // the number of threads in a block (in the X dimension, the only one we will use here, should be equal to RECOMMENDED_CUDA_BLOCK_SIZE)
int n_kernel_arguments; // number of kernel arguments int n_kernel_arguments; // number of kernel arguments
void *arg[MAX_N_ARGUMENTS]; // pointers to the kernel argument data void *arg[MAX_N_ARGUMENTS]; // pointers to the kernel argument data
@ -260,10 +260,10 @@ static void device_to_host_copy(cuda_data_t *cd,int idx)
// launch a CUDA kernel (with 0 bytes of shared memory and no extra options) // launch a CUDA kernel (with 0 bytes of shared memory and no extra options)
// //
static void lauch_kernel(cuda_data_t *cd) static void launch_kernel(cuda_data_t *cd)
{ {
if(cd->block_dim_x != (unsigned int)RECOMENDED_CUDA_BLOCK_SIZE) if(cd->block_dim_x != (unsigned int)RECOMMENDED_CUDA_BLOCK_SIZE)
fprintf(stderr,"lauch_kernel(): block_dim_x should be equal to %d\n",RECOMENDED_CUDA_BLOCK_SIZE); fprintf(stderr,"launch_kernel(): block_dim_x should be equal to %d\n",RECOMMENDED_CUDA_BLOCK_SIZE);
CU_CALL( cuLaunchKernel , (cd->cu_kernel,cd->grid_dim_x,1u,1u,cd->block_dim_x,1u,1u,0u,cd->cu_stream,&cd->arg[0],NULL) ); CU_CALL( cuLaunchKernel , (cd->cu_kernel,cd->grid_dim_x,1u,1u,cd->block_dim_x,1u,1u,0u,cd->cu_stream,&cd->arg[0],NULL) );
synchronize_cuda(cd); synchronize_cuda(cd);
} }

View File

@ -28,7 +28,7 @@
// //
// we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...) // we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...)
// //
#define RECOMENDED_CUDA_BLOCK_SIZE 128 #define RECOMMENDED_CUDA_BLOCK_SIZE 128
// //

View File

@ -165,7 +165,7 @@ static void test_sha1_avx(int n_tests,int n_measurements)
for(n = 0;n < n_measurements;n++) for(n = 0;n < n_measurements;n++)
{ {
interleaved_data[0][0]++; interleaved_data[0][0]++;
sha1(&data[lane].i[0],&hash[lane].i[0]); sha1_avx((v4si *)&interleaved_data[0],(v4si *)&interleaved_hash[0]);
sum += interleaved_hash[4][0]; sum += interleaved_hash[4][0];
} }
time_measurement(); time_measurement();
@ -235,7 +235,7 @@ static void test_sha1_avx2(int n_tests,int n_measurements)
for(n = 0;n < n_measurements;n++) for(n = 0;n < n_measurements;n++)
{ {
interleaved_data[0][0]++; interleaved_data[0][0]++;
sha1(&data[lane].i[0],&hash[lane].i[0]); sha1_avx2((v8si *)&interleaved_data[0],(v8si *)&interleaved_hash[0]);
sum += interleaved_hash[4][0]; sum += interleaved_hash[4][0];
} }
time_measurement(); time_measurement();
@ -305,7 +305,7 @@ static void test_sha1_avx512f(int n_tests,int n_measurements)
for(n = 0;n < n_measurements;n++) for(n = 0;n < n_measurements;n++)
{ {
interleaved_data[0][0]++; interleaved_data[0][0]++;
sha1(&data[lane].i[0],&hash[lane].i[0]); sha1_avx512f((v16si *)&interleaved_data[0],(v16si *)&interleaved_hash[0]);
sum += interleaved_hash[4][0]; sum += interleaved_hash[4][0];
} }
time_measurement(); time_measurement();
@ -375,7 +375,7 @@ static void test_sha1_neon(int n_tests,int n_measurements)
for(n = 0;n < n_measurements;n++) for(n = 0;n < n_measurements;n++)
{ {
interleaved_data[0][0]++; interleaved_data[0][0]++;
sha1(&data[lane].i[0],&hash[lane].i[0]); sha1_neon((uint32x4_t *)&interleaved_data[0],(uint32x4_t *)&interleaved_hash[0]);
sum += interleaved_hash[4][0]; sum += interleaved_hash[4][0];
} }
time_measurement(); time_measurement();

View File

@ -12,8 +12,7 @@
// //
#include "aad_sha1.h" #include "aad_sha1.h"
#include "aad_data_types.h"
typedef unsigned int u32_t;
// //
// the nvcc compiler stores w[] in registers (constant indices!) // the nvcc compiler stores w[] in registers (constant indices!)
@ -23,7 +22,7 @@ typedef unsigned int u32_t;
// warp thread number: n & 31 -- the lane // warp thread number: n & 31 -- the lane
// //
extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1) extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
void sha1_cuda_kernel(u32_t *interleaved32_data,u32_t *interleaved32_hash) void sha1_cuda_kernel(u32_t *interleaved32_data,u32_t *interleaved32_hash)
{ {
u32_t n; u32_t n;

View File

@ -19,7 +19,7 @@ static void test_sha1_cuda(int n_tests)
double host_to_device_time,kernel_time,device_to_host_time,hashes_per_second; double host_to_device_time,kernel_time,device_to_host_time,hashes_per_second;
cuda_data_t cd; cuda_data_t cd;
if(n_tests <= 0 || n_tests > (1 << 24) || n_tests % RECOMENDED_CUDA_BLOCK_SIZE != 0) if(n_tests <= 0 || n_tests > (1 << 24) || n_tests % RECOMMENDED_CUDA_BLOCK_SIZE != 0)
{ {
fprintf(stderr,"test_sha1_cuda(): bad number of tests\n"); fprintf(stderr,"test_sha1_cuda(): bad number of tests\n");
exit(1); exit(1);
@ -44,13 +44,13 @@ static void test_sha1_cuda(int n_tests)
host_to_device_copy(&cd,0); // idx=0 means that the interleaved32_data is copied to the CUDA device host_to_device_copy(&cd,0); // idx=0 means that the interleaved32_data is copied to the CUDA device
time_measurement(); time_measurement();
host_to_device_time = wall_time_delta(); host_to_device_time = wall_time_delta();
cd.grid_dim_x = (u32_t)n_tests / (u32_t)RECOMENDED_CUDA_BLOCK_SIZE; cd.grid_dim_x = (u32_t)n_tests / (u32_t)RECOMMENDED_CUDA_BLOCK_SIZE;
cd.block_dim_x = (u32_t)RECOMENDED_CUDA_BLOCK_SIZE; cd.block_dim_x = (u32_t)RECOMMENDED_CUDA_BLOCK_SIZE;
cd.n_kernel_arguments = 2; cd.n_kernel_arguments = 2;
cd.arg[0] = &cd.device_data[0]; // interleaved32_data cd.arg[0] = &cd.device_data[0]; // interleaved32_data
cd.arg[1] = &cd.device_data[1]; // interleaved32_hash cd.arg[1] = &cd.device_data[1]; // interleaved32_hash
time_measurement(); time_measurement();
lauch_kernel(&cd); launch_kernel(&cd);
time_measurement(); time_measurement();
kernel_time = wall_time_delta(); kernel_time = wall_time_delta();
time_measurement(); time_measurement();

35
aad_sha1_wasm.h Normal file
View File

@ -0,0 +1,35 @@
#ifndef AAD_SHA1_WASM
#define AAD_SHA1_WASM
#include "aad_data_types.h"
#include "aad_sha1.h"
//
// SHA1 hash computation for WebAssembly (scalar implementation)
//
static inline u32_t rotate_left(u32_t x, int n)
{
return (x << n) | (x >> (32 - n));
}
static void sha1(u32_t *coin, u32_t *hash)
{
// Define the macros needed by CUSTOM_SHA1_CODE
#define T u32_t
#define C(c) (c)
#define ROTATE(x,n) rotate_left(x,n)
#define DATA(idx) coin[idx]
#define HASH(idx) hash[idx]
// Use the standard SHA1 template from aad_sha1.h
CUSTOM_SHA1_CODE();
#undef T
#undef C
#undef ROTATE
#undef DATA
#undef HASH
}
#endif

258
index.html Normal file
View File

@ -0,0 +1,258 @@
<!DOCTYPE html>
<html>
<head>
<title>DETI Coin Miner - WebAssembly</title>
<style>
body {
font-family: monospace;
padding: 20px;
max-width: 1200px;
margin: 0 auto;
}
button {
padding: 10px 20px;
margin: 5px;
font-size: 16px;
}
#stats {
margin-top: 20px;
padding: 10px;
background: #f0f0f0;
border-radius: 5px;
box-sizing: border-box;
}
#coins-container {
margin-top: 20px;
}
#coins {
width: 100%;
height: 300px;
padding: 10px;
background: #f9f9f9;
border: 2px solid #ccc;
border-radius: 5px;
font-family: monospace;
font-size: 12px;
overflow-y: auto;
white-space: pre-wrap;
word-wrap: break-word;
box-sizing: border-box;
}
.control-group {
margin: 10px 0;
}
label {
display: inline-block;
width: 200px;
}
h2 {
margin-top: 20px;
margin-bottom: 10px;
}
.coin-entry {
color: #006400;
font-weight: bold;
}
.coin-data {
color: #000080;
}
</style>
</head>
<body>
<h1>DETI Coin Miner (WebAssembly)</h1>
<div class="control-group">
<label>Iterations per batch:</label>
<input type="number" id="batchSize" value="1000000" step="100000">
</div>
<div class="control-group">
<label>Update interval (ms):</label>
<input type="number" id="updateInterval" value="100" step="50">
</div>
<button id="start">Start Mining</button>
<button id="stop">Stop Mining</button>
<button id="reset">Reset</button>
<button id="clearCoins">Clear Coins Display</button>
<div id="stats">
Waiting to start...
</div>
<div id="coins-container">
<h2>Found Coins (<span id="coin-count">0</span>)</h2>
<div id="coins"></div>
</div>
<script src="coin_miner_wasm.js"></script>
<script>
let mining = false;
let Module;
let miningInterval;
let updateInterval;
let lastDisplayedCoinCount = 0;
let pausedStats = false;
CoinMinerModule().then(mod => {
Module = mod;
console.log('WebAssembly module loaded');
document.getElementById('start').onclick = () => {
if (!mining) {
mining = true;
Module._resume_mining();
pausedStats = false;
console.log('Starting mining...');
startMining();
}
};
document.getElementById('stop').onclick = () => {
mining = false;
Module._stop_mining();
clearInterval(miningInterval);
clearInterval(updateInterval);
updateStats();
let currentHTML = document.getElementById('stats').innerHTML;
document.getElementById('stats').innerHTML = currentHTML.replace('Mining Statistics:', 'Mining Statistics (PAUSED):');
pausedStats = true;
console.log('Mining stopped');
};
document.getElementById('reset').onclick = () => {
Module._reset_mining();
mining = false;
pausedStats = false;
clearInterval(miningInterval);
clearInterval(updateInterval);
lastDisplayedCoinCount = 0;
document.getElementById('stats').innerHTML = 'Reset complete. Click Start to begin.';
document.getElementById('coins').innerHTML = '';
document.getElementById('coin-count').textContent = '0';
console.log('Mining reset');
};
document.getElementById('clearCoins').onclick = () => {
document.getElementById('coins').innerHTML = '';
console.log('Coins display cleared');
};
function coinToString(coinPtr) {
let coinStr = '';
for (let i = 0; i < 55; i++) {
const byteIdx = i ^ 3;
const wordIdx = Math.floor(byteIdx / 4);
const byteInWord = byteIdx % 4;
const word = Module.getValue(coinPtr + wordIdx * 4, 'i32');
const byte = (word >> (byteInWord * 8)) & 0xFF;
if (byte >= 32 && byte <= 126) {
coinStr += String.fromCharCode(byte);
} else if (byte === 10) {
coinStr += '\\n';
} else if (byte === 0x80) {
coinStr += '[0x80]';
} else {
coinStr += `[0x${byte.toString(16).padStart(2, '0')}]`;
}
}
return coinStr;
}
function displayNewCoins() {
const totalCoins = Module._get_found_coins_count();
if (totalCoins > lastDisplayedCoinCount) {
const coinsDiv = document.getElementById('coins');
for (let i = lastDisplayedCoinCount; i < totalCoins; i++) {
const coinPtr = Module._get_found_coin(i);
if (coinPtr !== 0) {
const coinStr = coinToString(coinPtr);
const timestamp = new Date().toLocaleTimeString();
const entry = document.createElement('div');
entry.innerHTML = `<span class="coin-entry">[${timestamp}] Coin #${i + 1}:</span> <span class="coin-data">${coinStr}</span>`;
coinsDiv.appendChild(entry);
coinsDiv.scrollTop = coinsDiv.scrollHeight;
}
}
lastDisplayedCoinCount = totalCoins;
document.getElementById('coin-count').textContent = totalCoins;
}
}
function updateStats() {
const attemptsPtr = Module._malloc(8);
const coinsPtr = Module._malloc(4);
const hashRatePtr = Module._malloc(8);
const elapsedPtr = Module._malloc(8);
// Zero-initialize the memory before calling
Module.setValue(attemptsPtr, 0, 'i32');
Module.setValue(attemptsPtr + 4, 0, 'i32');
Module._get_statistics(attemptsPtr, coinsPtr, hashRatePtr, elapsedPtr);
// Read 64-bit unsigned value correctly
// On little-endian, low 32 bits come first
const attemptsLowUnsigned = Module.getValue(attemptsPtr, 'i32') >>> 0;
const attemptsHighUnsigned = Module.getValue(attemptsPtr + 4, 'i32') >>> 0;
// Combine - for display purposes, if high part is 0, just show low part
let attempts;
if (attemptsHighUnsigned === 0) {
attempts = attemptsLowUnsigned;
} else {
// Use BigInt for values > 32 bits
const low = BigInt(attemptsLowUnsigned);
const high = BigInt(attemptsHighUnsigned);
attempts = (high * BigInt(4294967296)) + low;
}
const coins = Module.getValue(coinsPtr, 'i32');
const hashRate = Module.getValue(hashRatePtr, 'double');
const elapsed = Module.getValue(elapsedPtr, 'double');
Module._free(attemptsPtr);
Module._free(coinsPtr);
Module._free(hashRatePtr);
Module._free(elapsedPtr);
document.getElementById('stats').innerHTML = `
<strong>Mining Statistics:</strong><br>
Attempts: ${attempts.toString()}<br>
Coins Found: ${coins}<br>
Hash Rate: ${(hashRate / 1e6).toFixed(2)} MH/s<br>
Elapsed Time: ${elapsed.toFixed(2)} seconds
`;
displayNewCoins();
}
function startMining() {
const batchSize = parseInt(document.getElementById('batchSize').value);
const updateMs = parseInt(document.getElementById('updateInterval').value);
miningInterval = setInterval(() => {
if (!mining) {
clearInterval(miningInterval);
return;
}
Module._mine_coins_wasm(batchSize);
}, 0);
updateInterval = setInterval(updateStats, updateMs);
}
}).catch(err => {
console.error('Failed to load WebAssembly module:', err);
document.getElementById('stats').innerHTML = 'Error loading module: ' + err.message;
});
</script>
</body>
</html>

View File

@ -32,7 +32,7 @@ OPENCL_DIR = $(CUDA_DIR)
# RTX 4070 -------------- sm_89 # RTX 4070 -------------- sm_89
# #
CUDA_ARCH = sm_75 CUDA_ARCH = sm_86
# #
@ -42,6 +42,9 @@ CUDA_ARCH = sm_75
clean: clean:
rm -f sha1_tests rm -f sha1_tests
rm -f sha1_cuda_test sha1_cuda_kernel.cubin rm -f sha1_cuda_test sha1_cuda_kernel.cubin
rm -f coin_miner_cpu coin_miner_simd coin_miner_cuda coin_miner_cuda_kernel.cubin
rm -f coin_miner_wasm.js coin_miner_wasm.wasm
rm -f benchmark
rm -f a.out rm -f a.out
@ -64,3 +67,39 @@ sha1_cuda_kernel.cubin: aad_sha1_cuda_kernel.cu aad_sha1.h makefile
nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@ nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@
all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin
#
# DETI coin miners
#
coin_miner_cpu: aad_coin_miner_cpu.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
coin_miner_simd: aad_coin_miner_simd.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile
cc -march=native -Wall -Wshadow -Werror -fopenmp -mavx2 -O3 $< -o $@
coin_miner_cuda_kernel.cubin: aad_coin_miner_cuda_kernel.cu aad_sha1.h makefile
nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@
coin_miner_cuda: aad_coin_miner_cuda.c coin_miner_cuda_kernel.cubin aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h aad_cuda_utilities.h makefile
cc -march=native -Wall -Wshadow -Werror -O3 -I$(CUDA_DIR)/include $< -o $@ -lcuda
coin_miner_wasm: aad_coin_miner_wasm.c aad_sha1.h aad_sha1_cpu.h aad_sha1_wasm.h aad_data_types.h aad_utilities.h aad_vault.h makefile
emcc -O3 -flto -o coin_miner_wasm.js aad_coin_miner_wasm.c \
-s WASM=1 \
-s EXPORTED_FUNCTIONS='["_mine_coins_wasm","_get_statistics","_stop_mining","_reset_mining","_get_found_coin","_get_found_coins_count","_malloc","_free"]' \
-s EXPORTED_RUNTIME_METHODS='["cwrap","ccall","getValue","setValue"]' \
-s ALLOW_MEMORY_GROWTH=1 \
-s MODULARIZE=1 \
-s EXPORT_NAME='CoinMinerModule' \
-s INITIAL_MEMORY=67108864
benchmark: aad_benchmark.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h makefile
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
miners: coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda benchmark
all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin \
coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_cuda_kernel.cubin \
benchmark

Binary file not shown.

Binary file not shown.