Refactor CUDA miner: optimize kernel and improve coin storage handling
Signed-off-by: RubenCGomes <rlcg@ua.pt>
This commit is contained in:
parent
7fd480fe64
commit
7350d6be95
|
|
@ -1,7 +1,7 @@
|
||||||
//
|
//
|
||||||
// Arquiteturas de Alto Desempenho 2025/2026
|
// Arquiteturas de Alto Desempenho 2025/2026
|
||||||
//
|
//
|
||||||
// DETI Coin Miner - CUDA implementation with histograms
|
// DETI Coin Miner - Host Code
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
@ -11,173 +11,123 @@
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "aad_data_types.h"
|
#include "aad_data_types.h"
|
||||||
#include "aad_utilities.h"
|
|
||||||
#include "aad_sha1_cpu.h"
|
#include "aad_sha1_cpu.h"
|
||||||
#include "aad_cuda_utilities.h"
|
#include "aad_cuda_utilities.h"
|
||||||
#include "aad_vault.h"
|
#include "aad_vault.h"
|
||||||
|
|
||||||
#define COINS_STORAGE_SIZE 1024u
|
#define COINS_STORAGE_SIZE 2048u // Increased buffer slightly
|
||||||
#define MAX_HISTOGRAM_BINS 100
|
|
||||||
|
|
||||||
static volatile int keep_running = 1;
|
static volatile int keep_running = 1;
|
||||||
|
|
||||||
void signal_handler(int signum)
|
void signal_handler(int signum) {
|
||||||
{
|
|
||||||
(void)signum;
|
(void)signum;
|
||||||
keep_running = 0;
|
keep_running = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get current wall time in seconds
|
static double get_wall_time(void) {
|
||||||
static double get_wall_time(void)
|
|
||||||
{
|
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
|
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Coin reconstruction from stored data
|
|
||||||
static void reconstruct_coin(u32_t *stored_data, u32_t coin[14])
|
|
||||||
{
|
|
||||||
// Simply copy the complete coin data from storage
|
|
||||||
for(int i = 0; i < 14; i++)
|
|
||||||
coin[i] = stored_data[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Mine DETI coins using CUDA
|
|
||||||
//
|
|
||||||
static void mine_coins_cuda(u64_t max_attempts, double max_time)
|
static void mine_coins_cuda(u64_t max_attempts, double max_time)
|
||||||
{
|
{
|
||||||
cuda_data_t cd;
|
cuda_data_t cd;
|
||||||
u32_t *host_storage;
|
u32_t *host_storage;
|
||||||
u64_t attempts = 0;
|
u64_t attempts = 0;
|
||||||
u32_t coins_found = 0;
|
u32_t coins_found_total = 0;
|
||||||
u32_t kernel_runs = 0;
|
|
||||||
|
|
||||||
// Initialize CUDA
|
// Initialize CUDA
|
||||||
|
memset(&cd, 0, sizeof(cd));
|
||||||
cd.device_number = 0;
|
cd.device_number = 0;
|
||||||
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
|
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
|
||||||
cd.kernel_name = "mine_deti_coins_kernel";
|
cd.kernel_name = "mine_deti_coins_kernel";
|
||||||
|
|
||||||
|
// Allocate memory for results [ Counter (1 u32) | Data ... ]
|
||||||
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t);
|
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t);
|
||||||
cd.data_size[1] = 0;
|
cd.data_size[1] = 0;
|
||||||
|
|
||||||
initialize_cuda(&cd);
|
initialize_cuda(&cd);
|
||||||
|
|
||||||
host_storage = (u32_t *)cd.host_data[0];
|
host_storage = (u32_t *)cd.host_data[0];
|
||||||
|
|
||||||
// Kernel configuration
|
// Configure Launch Dimensions
|
||||||
cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE;
|
// Maximizing occupancy:
|
||||||
cd.grid_dim_x = 400; // Large grid for maximum GPU utilization
|
cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE; // Usually 128 or 256
|
||||||
|
cd.grid_dim_x = 80 * 4; // High number of blocks to hide latency
|
||||||
|
|
||||||
u32_t n_threads = cd.grid_dim_x * cd.block_dim_x;
|
u32_t total_threads = cd.grid_dim_x * cd.block_dim_x;
|
||||||
|
u32_t attempts_per_thread = 4096; // Work per kernel launch
|
||||||
|
|
||||||
printf("Mining DETI coins using CUDA...\n");
|
printf("Starting CUDA Miner on %s\n", cd.device_name);
|
||||||
printf("Grid: %u blocks × %u threads = %u total threads\n",
|
printf("Threads: %u, Attempts/Thread: %u\n", total_threads, attempts_per_thread);
|
||||||
cd.grid_dim_x, cd.block_dim_x, n_threads);
|
|
||||||
printf("Kernel: %s\n", cd.kernel_name);
|
|
||||||
|
|
||||||
if(max_attempts > 0 && max_time > 0)
|
|
||||||
printf("Will stop after %llu attempts OR %.2f seconds (whichever comes first)\n",
|
|
||||||
(unsigned long long)max_attempts, max_time);
|
|
||||||
else if(max_attempts > 0)
|
|
||||||
printf("Will stop after %llu attempts\n", (unsigned long long)max_attempts);
|
|
||||||
else if(max_time > 0)
|
|
||||||
printf("Will stop after %.2f seconds\n", max_time);
|
|
||||||
else
|
|
||||||
printf("Running indefinitely until Ctrl+C...\n");
|
|
||||||
|
|
||||||
printf("Press Ctrl+C to stop\n\n");
|
|
||||||
|
|
||||||
u64_t base_nonce = 0;
|
u64_t base_nonce = 0;
|
||||||
u32_t attempts_per_thread = 1024 * 16; // Increased attempts per thread
|
|
||||||
|
|
||||||
double start_time = get_wall_time();
|
double start_time = get_wall_time();
|
||||||
time_measurement();
|
|
||||||
|
|
||||||
while(keep_running)
|
// Arguments pointers
|
||||||
{
|
cd.n_kernel_arguments = 3;
|
||||||
// Check stopping conditions
|
|
||||||
if(max_attempts > 0 && attempts >= max_attempts)
|
|
||||||
break;
|
|
||||||
|
|
||||||
double elapsed = get_wall_time() - start_time;
|
|
||||||
if(max_time > 0 && elapsed >= max_time)
|
|
||||||
break;
|
|
||||||
|
|
||||||
// Initialize storage area
|
|
||||||
host_storage[0] = 1u; // First unused index
|
|
||||||
|
|
||||||
// Copy to device
|
|
||||||
host_to_device_copy(&cd, 0);
|
|
||||||
|
|
||||||
// Set kernel arguments
|
|
||||||
cd.n_kernel_arguments = 2;
|
|
||||||
cd.arg[0] = &cd.device_data[0];
|
cd.arg[0] = &cd.device_data[0];
|
||||||
cd.arg[1] = &base_nonce;
|
cd.arg[1] = &base_nonce;
|
||||||
cd.arg[2] = &attempts_per_thread;
|
cd.arg[2] = &attempts_per_thread;
|
||||||
|
|
||||||
// Launch the CUDA kernel
|
while(keep_running)
|
||||||
|
{
|
||||||
|
// 1. Reset storage counter
|
||||||
|
host_storage[0] = 1u; // Index 0 is the atomic counter. Start data at index 1.
|
||||||
|
host_to_device_copy(&cd, 0);
|
||||||
|
|
||||||
|
// 2. Launch Kernel
|
||||||
launch_kernel(&cd);
|
launch_kernel(&cd);
|
||||||
|
|
||||||
// Copy results back
|
// 3. Retrieve Results
|
||||||
device_to_host_copy(&cd, 0);
|
device_to_host_copy(&cd, 0);
|
||||||
|
|
||||||
// Process found coins
|
// 4. Process Found Coins
|
||||||
u32_t n_coins_this_kernel = 0;
|
u32_t next_write_idx = host_storage[0];
|
||||||
u32_t n_stored = (host_storage[0] - 1) / 14;
|
u32_t num_u32_written = next_write_idx - 1;
|
||||||
|
|
||||||
if(n_stored > 0 && host_storage[0] < COINS_STORAGE_SIZE)
|
// Each coin is 14 u32 words
|
||||||
|
if(num_u32_written >= 14)
|
||||||
{
|
{
|
||||||
for(u32_t i = 0; i < n_stored; i++)
|
int coins_in_batch = num_u32_written / 14;
|
||||||
|
for(int c = 0; c < coins_in_batch; c++)
|
||||||
{
|
{
|
||||||
u32_t coin[14];
|
u32_t found_coin[14];
|
||||||
reconstruct_coin(&host_storage[1 + i * 14], coin);
|
// Copy from host buffer to temp array
|
||||||
|
for(int w=0; w<14; w++) {
|
||||||
|
found_coin[w] = host_storage[1 + (c * 14) + w];
|
||||||
|
}
|
||||||
|
|
||||||
coins_found++;
|
// Verify/Save using required function
|
||||||
n_coins_this_kernel++;
|
save_coin(found_coin);
|
||||||
printf("COIN FOUND! (kernel %u, coin %u in this kernel). Total coins:%u\n",
|
coins_found_total++;
|
||||||
kernel_runs, n_coins_this_kernel, coins_found);
|
printf("Coin Found! Total: %u\n", coins_found_total);
|
||||||
save_coin(coin);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update counters
|
// 5. Update Progress
|
||||||
kernel_runs++;
|
u64_t batch_attempts = (u64_t)total_threads * attempts_per_thread;
|
||||||
u64_t attempts_this_launch = (u64_t)n_threads * attempts_per_thread;
|
attempts += batch_attempts;
|
||||||
attempts += attempts_this_launch;
|
base_nonce += batch_attempts; // Ensure next kernel uses new nonces
|
||||||
base_nonce += attempts_this_launch;
|
|
||||||
|
// 6. Check Limits
|
||||||
|
if((max_attempts > 0 && attempts >= max_attempts) ||
|
||||||
|
(max_time > 0 && (get_wall_time() - start_time) >= max_time)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
time_measurement();
|
// Cleanup
|
||||||
double total_time = cpu_time_delta();
|
double total_time = get_wall_time() - start_time;
|
||||||
|
printf("\nMining Finished.\n");
|
||||||
printf("\n=== Mining Statistics ===\n");
|
printf("Attempts: %llu\n", (unsigned long long)attempts);
|
||||||
printf("Total attempts: %llu\n", (unsigned long long)attempts);
|
printf("Time: %.4fs\n", total_time);
|
||||||
printf("Total time: %.2f seconds\n", total_time);
|
printf("Hashrate: %.2f MH/s\n", (attempts / total_time) / 1000000.0);
|
||||||
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
|
|
||||||
printf("Coins found: %u\n", coins_found);
|
|
||||||
printf("Kernel launches: %u\n", kernel_runs);
|
|
||||||
|
|
||||||
// Save any remaining coins
|
|
||||||
save_coin(NULL);
|
|
||||||
|
|
||||||
|
save_coin(NULL); // Flush vault
|
||||||
terminate_cuda(&cd);
|
terminate_cuda(&cd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_usage(const char *prog_name)
|
|
||||||
{
|
|
||||||
printf("Usage: %s [OPTIONS]\n", prog_name);
|
|
||||||
printf("Options:\n");
|
|
||||||
printf(" -a <attempts> Maximum number of attempts\n");
|
|
||||||
printf(" -t <seconds> Maximum time in seconds\n");
|
|
||||||
printf(" -h Show this help message\n");
|
|
||||||
printf("\nExamples:\n");
|
|
||||||
printf(" %s -a 1000000000 # Run for 1B attempts\n", prog_name);
|
|
||||||
printf(" %s -t 60 # Run for 60 seconds\n", prog_name);
|
|
||||||
printf(" %s -a 1000000000 -t 60 # Stop at 1B attempts OR 60s (whichever first)\n", prog_name);
|
|
||||||
printf(" %s # Run indefinitely until Ctrl+C\n", prog_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
u64_t max_attempts = 0;
|
u64_t max_attempts = 0;
|
||||||
|
|
@ -186,27 +136,17 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
signal(SIGINT, signal_handler);
|
signal(SIGINT, signal_handler);
|
||||||
|
|
||||||
// Parse command line options
|
while((opt = getopt(argc, argv, "a:t:")) != -1)
|
||||||
while((opt = getopt(argc, argv, "a:t:h")) != -1)
|
|
||||||
{
|
{
|
||||||
switch(opt)
|
switch(opt) {
|
||||||
{
|
case 'a': max_attempts = strtoull(optarg, NULL, 10); break;
|
||||||
case 'a':
|
case 't': max_time = atof(optarg); break;
|
||||||
max_attempts = strtoull(optarg, NULL, 10);
|
|
||||||
break;
|
|
||||||
case 't':
|
|
||||||
max_time = atof(optarg);
|
|
||||||
break;
|
|
||||||
case 'h':
|
|
||||||
print_usage(argv[0]);
|
|
||||||
return 0;
|
|
||||||
default:
|
default:
|
||||||
print_usage(argv[0]);
|
fprintf(stderr, "Usage: %s -a <attempts> -t <seconds>\n", argv[0]);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mine_coins_cuda(max_attempts, max_time);
|
mine_coins_cuda(max_attempts, max_time);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1,96 +1,127 @@
|
||||||
//
|
//
|
||||||
// Arquiteturas de Alto Desempenho 2025/2026
|
// Arquiteturas de Alto Desempenho 2025/2026
|
||||||
//
|
//
|
||||||
// DETI Coin Miner - CUDA kernel (optimized for mining)
|
// DETI Coin Miner - CUDA kernel (Optimized)
|
||||||
//
|
//
|
||||||
|
|
||||||
#include "aad_sha1.h"
|
#include "aad_sha1.h"
|
||||||
#include "aad_data_types.h"
|
#include "aad_data_types.h"
|
||||||
|
|
||||||
//
|
//
|
||||||
// Optimized CUDA kernel for DETI coin mining
|
// Optimized CUDA kernel
|
||||||
// Each thread generates coins using the same approach as CPU/SIMD miners
|
|
||||||
//
|
//
|
||||||
|
|
||||||
extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1)
|
extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1)
|
||||||
void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread)
|
void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread)
|
||||||
{
|
{
|
||||||
u32_t coin[14];
|
u32_t coin[16]; // SHA1 requires 16 words (64 bytes)
|
||||||
u32_t hash[5];
|
u32_t hash[5];
|
||||||
u32_t n;
|
|
||||||
u08_t *bytes = (u08_t *)coin;
|
|
||||||
|
|
||||||
// Get thread index (used as offset from base counter)
|
// 1. Initialize Fixed Prefix: "DETI coin 2 " (12 bytes)
|
||||||
n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x;
|
// We construct this directly into the u32 array.
|
||||||
|
// Note: We assume the system is Little Endian, but SHA1 input via macro usually handles bytes.
|
||||||
|
// Ideally, we pack bytes: 'D','E','T','I' -> 0x44455449
|
||||||
|
|
||||||
// Initialize coin template: "DETI coin 2 " + variable + "\n\x80"
|
// Word 0: "DETI"
|
||||||
// Use byte-swapped format to match host expectations (idx ^ 3)
|
coin[0] = (u32_t)'D' << 24 | (u32_t)'E' << 16 | (u32_t)'T' << 8 | (u32_t)'I';
|
||||||
coin[0] = ('D' << 24) + ('E' << 16) + ('T' << 8) + 'I';
|
// Word 1: " coi"
|
||||||
coin[1] = (' ' << 24) + ('c' << 16) + ('o' << 8) + 'i';
|
coin[1] = (u32_t)' ' << 24 | (u32_t)'c' << 16 | (u32_t)'o' << 8 | (u32_t)'i';
|
||||||
coin[2] = ('n' << 24) + (' ' << 16) + ('2' << 8) + ' ';
|
// Word 2: "n 2 "
|
||||||
|
coin[2] = (u32_t)'n' << 24 | (u32_t)' ' << 16 | (u32_t)'2' << 8 | (u32_t)' ';
|
||||||
|
|
||||||
// Fill the variable part of the coin with a pattern
|
// 2. Initialize Variable Part (Bytes 12 to 53)
|
||||||
for(int i = 3; i < 14; i++)
|
// Fill with a safe printable char ' ' (0x20)
|
||||||
coin[i] = 0x41414141; // 'AAAA'
|
#pragma unroll
|
||||||
|
for(int i = 3; i <= 12; i++) {
|
||||||
|
coin[i] = 0x20202020;
|
||||||
|
}
|
||||||
|
// Word 13 is partial variable + suffix
|
||||||
|
// Bytes 52, 53 are variable. Byte 54 is '\n', Byte 55 is 0x80 (Padding)
|
||||||
|
coin[13] = 0x20200A80;
|
||||||
|
|
||||||
// End with newline and padding
|
// 3. Initialize SHA1 Length Padding
|
||||||
bytes[0x36 ^ 3] = '\n'; // Position 54
|
// Message is 55 bytes. Length in bits = 55 * 8 = 440.
|
||||||
bytes[0x37 ^ 3] = 0x80; // Position 55
|
// SHA1 puts length at the very end (Word 15).
|
||||||
|
coin[14] = 0x00000000;
|
||||||
|
coin[15] = 440;
|
||||||
|
|
||||||
for(u32_t i = 0; i < attempts_per_thread; ++i) {
|
// 4. Thread Unique Initialization
|
||||||
// Initialize variable part (positions 12-53, 42 bytes)
|
// We use the thread ID to set the initial state of the variable bytes
|
||||||
// Start with A-Z pattern like CPU/SIMD miners
|
// to ensure every thread starts at a different point.
|
||||||
for(int j = 12; j < 54; j++)
|
u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
bytes[j ^ 3] = 'A' + ((j - 12) % 26);
|
u64_t nonce_offset = base_nonce + thread_id * attempts_per_thread;
|
||||||
|
|
||||||
// Calculate offset based on thread index and parameters
|
// "Seeding" the message with the nonce (Fast update of specific bytes)
|
||||||
// This creates a unique starting point for each thread
|
// We modify the bytes in words 3 through 12.
|
||||||
u64_t offset = base_nonce + n + (u64_t)i * gridDim.x * blockDim.x;
|
// Accessing as byte pointer for easier manipulation
|
||||||
|
u08_t *byte_ptr = (u08_t*)coin;
|
||||||
|
|
||||||
// Apply offset to variable part (increment the coin counter)
|
// Apply the nonce offset to the message structure (Odometer setup)
|
||||||
for(int pos = 53; pos >= 12 && offset > 0; pos--)
|
// Start modifying from byte 12
|
||||||
{
|
u64_t temp_nonce = nonce_offset;
|
||||||
u08_t *byte = &bytes[pos ^ 3];
|
for (int k = 12; k < 54 && temp_nonce > 0; k++) {
|
||||||
u64_t add = offset % 127;
|
u32_t val = byte_ptr[k ^ 3] + (temp_nonce % 95); // mod 95 to stay in printable ASCII
|
||||||
offset /= 127;
|
temp_nonce /= 95;
|
||||||
|
|
||||||
u32_t val = *byte + add;
|
if (val > 0x7E) { // Wrap around printable range
|
||||||
u08_t new_val = val % 127;
|
val -= 95;
|
||||||
|
temp_nonce++; // Carry
|
||||||
// Skip newline character (ASCII 10) in the variable part
|
}
|
||||||
if(new_val == '\n')
|
byte_ptr[k ^ 3] = (u08_t)val;
|
||||||
new_val++;
|
|
||||||
|
|
||||||
*byte = new_val;
|
|
||||||
offset += val / 127; // Carry
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute SHA1 hash
|
// 5. Mining Loop
|
||||||
|
for(u32_t attempt = 0; attempt < attempts_per_thread; attempt++)
|
||||||
|
{
|
||||||
|
// --- SHA1 HASH CALCULATION ---
|
||||||
#define T u32_t
|
#define T u32_t
|
||||||
#define C(c) (c)
|
#define C(c) (c)
|
||||||
#define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
#define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
||||||
#define DATA(idx) coin[idx]
|
#define DATA(idx) coin[idx]
|
||||||
#define HASH(idx) hash[idx]
|
#define HASH(idx) hash[idx]
|
||||||
|
|
||||||
CUSTOM_SHA1_CODE();
|
CUSTOM_SHA1_CODE();
|
||||||
|
|
||||||
#undef T
|
#undef T
|
||||||
#undef C
|
#undef C
|
||||||
#undef ROTATE
|
#undef ROTATE
|
||||||
#undef DATA
|
#undef DATA
|
||||||
#undef HASH
|
#undef HASH
|
||||||
|
|
||||||
// Check if this is a valid DETI coin
|
// --- CHECK RESULT ---
|
||||||
|
// Check for "aad20250" prefix (AAD20250 hex)
|
||||||
if(hash[0] == 0xAAD20250u)
|
if(hash[0] == 0xAAD20250u)
|
||||||
{
|
{
|
||||||
// Found a coin! Store it atomically
|
// Found a candidate! Save it.
|
||||||
u32_t idx = atomicAdd(coins_storage_area, 14u);
|
u32_t idx = atomicAdd(&coins_storage_area[0], 14u);
|
||||||
|
|
||||||
// Make sure we don't write outside buffer
|
// Boundary check (first word is count, data starts at index 1)
|
||||||
if(idx < 1024u - 14u)
|
// We normalize the index to be relative to storage start
|
||||||
|
if(idx < 1024u - 15u) // Ensure space
|
||||||
{
|
{
|
||||||
// Store the complete coin data
|
// Store valid coin (14 words = 56 bytes, covers the 55 byte content)
|
||||||
for(int k = 0; k < 14; k++)
|
// Adjust idx because coins_storage_area[0] is the counter
|
||||||
coins_storage_area[idx + k] = coin[k];
|
for(int w=0; w<14; w++) {
|
||||||
|
coins_storage_area[idx + w] = coin[w];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- UPDATE MESSAGE (ODOMETER) ---
|
||||||
|
// Increment the message string for the next attempt
|
||||||
|
// We only touch the variable bytes.
|
||||||
|
// Start at byte 53 (just before the \n) and work backwards if carry needed.
|
||||||
|
// Note: byte_ptr access needs XOR 3 for Endianness correction on arrays treated as words
|
||||||
|
|
||||||
|
int pos = 53;
|
||||||
|
while (pos >= 12) {
|
||||||
|
u08_t *b = &byte_ptr[pos ^ 3];
|
||||||
|
(*b)++;
|
||||||
|
if (*b <= 0x7E) {
|
||||||
|
break; // No carry, done incrementing
|
||||||
|
}
|
||||||
|
// Overflow printable range, reset to start of range (0x20) and carry
|
||||||
|
*b = 0x20;
|
||||||
|
pos--;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
7
makefile
7
makefile
|
|
@ -98,11 +98,8 @@ coin_miner_wasm: aad_coin_miner_wasm.c aad_sha1.h aad_sha1_cpu.h aad_sha1_wasm.h
|
||||||
-s EXPORT_NAME='CoinMinerModule' \
|
-s EXPORT_NAME='CoinMinerModule' \
|
||||||
-s INITIAL_MEMORY=67108864
|
-s INITIAL_MEMORY=67108864
|
||||||
|
|
||||||
benchmark: aad_benchmark.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h makefile
|
|
||||||
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
|
|
||||||
|
|
||||||
miners: coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_ocl benchmark
|
miners: coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_ocl
|
||||||
|
|
||||||
all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin \
|
all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin \
|
||||||
coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl \
|
coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl
|
||||||
benchmark
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue