Compare commits

..

No commits in common. "80b65a314c53a3a6eba5b40a1d7114ac05d0e766" and "7347ddd1db8cd93bae65775a46d677d462108bb5" have entirely different histories.

7 changed files with 225 additions and 443 deletions

2
.gitignore vendored
View File

@ -68,8 +68,6 @@ coin_miner_cuda
coin_miner_ocl coin_miner_ocl
coin_miner_wasm.js coin_miner_wasm.js
coin_miner_wasm.wasm coin_miner_wasm.wasm
coin_miner_cpu_threads
coin_miner_dna_shape_cuda
# Vault # Vault
deti_coins*_vault.txt deti_coins*_vault.txt

View File

@ -1,7 +1,7 @@
// //
// Arquiteturas de Alto Desempenho 2025/2026 // Arquiteturas de Alto Desempenho 2025/2026
// //
// DETI Coin Miner - Host Code // DETI Coin Miner - CUDA implementation with histograms
// //
#include <time.h> #include <time.h>
@ -11,123 +11,173 @@
#include <signal.h> #include <signal.h>
#include <getopt.h> #include <getopt.h>
#include "aad_data_types.h" #include "aad_data_types.h"
#include "aad_utilities.h"
#include "aad_sha1_cpu.h" #include "aad_sha1_cpu.h"
#include "aad_cuda_utilities.h" #include "aad_cuda_utilities.h"
#include "aad_vault.h" #include "aad_vault.h"
#define COINS_STORAGE_SIZE 2048u // Increased buffer slightly #define COINS_STORAGE_SIZE 1024u
#define MAX_HISTOGRAM_BINS 100
static volatile int keep_running = 1; static volatile int keep_running = 1;
void signal_handler(int signum) { void signal_handler(int signum)
{
(void)signum; (void)signum;
keep_running = 0; keep_running = 0;
} }
static double get_wall_time(void) { // Get current wall time in seconds
static double get_wall_time(void)
{
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9; return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
} }
// Coin reconstruction from stored data
static void reconstruct_coin(u32_t *stored_data, u32_t coin[14])
{
// Simply copy the complete coin data from storage
for(int i = 0; i < 14; i++)
coin[i] = stored_data[i];
}
//
// Mine DETI coins using CUDA
//
static void mine_coins_cuda(u64_t max_attempts, double max_time) static void mine_coins_cuda(u64_t max_attempts, double max_time)
{ {
cuda_data_t cd; cuda_data_t cd;
u32_t *host_storage; u32_t *host_storage;
u64_t attempts = 0; u64_t attempts = 0;
u32_t coins_found_total = 0; u32_t coins_found = 0;
u32_t kernel_runs = 0;
// Initialize CUDA // Initialize CUDA
memset(&cd, 0, sizeof(cd));
cd.device_number = 0; cd.device_number = 0;
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin"; cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
cd.kernel_name = "mine_deti_coins_kernel"; cd.kernel_name = "mine_deti_coins_kernel";
// Allocate memory for results [ Counter (1 u32) | Data ... ]
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t); cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t);
cd.data_size[1] = 0; cd.data_size[1] = 0;
initialize_cuda(&cd); initialize_cuda(&cd);
host_storage = (u32_t *)cd.host_data[0]; host_storage = (u32_t *)cd.host_data[0];
// Configure Launch Dimensions // Kernel configuration
// Maximizing occupancy: cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE;
cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE; // Usually 128 or 256 cd.grid_dim_x = 4096; // Large grid for maximum GPU utilization
cd.grid_dim_x = 80 * 4; // High number of blocks to hide latency
u32_t total_threads = cd.grid_dim_x * cd.block_dim_x; u32_t n_threads = cd.grid_dim_x * cd.block_dim_x;
u32_t attempts_per_thread = 4096; // Work per kernel launch
printf("Starting CUDA Miner on %s\n", cd.device_name); printf("Mining DETI coins using CUDA...\n");
printf("Threads: %u, Attempts/Thread: %u\n", total_threads, attempts_per_thread); printf("Grid: %u blocks × %u threads = %u total threads\n",
cd.grid_dim_x, cd.block_dim_x, n_threads);
printf("Kernel: %s\n", cd.kernel_name);
if(max_attempts > 0 && max_time > 0)
printf("Will stop after %llu attempts OR %.2f seconds (whichever comes first)\n",
(unsigned long long)max_attempts, max_time);
else if(max_attempts > 0)
printf("Will stop after %llu attempts\n", (unsigned long long)max_attempts);
else if(max_time > 0)
printf("Will stop after %.2f seconds\n", max_time);
else
printf("Running indefinitely until Ctrl+C...\n");
printf("Press Ctrl+C to stop\n\n");
u64_t base_nonce = 0; u64_t base_nonce = 0;
double start_time = get_wall_time(); u32_t attempts_per_thread = 1024 * 8; // Increased attempts per thread
// Arguments pointers double start_time = get_wall_time();
cd.n_kernel_arguments = 3; time_measurement();
cd.arg[0] = &cd.device_data[0];
cd.arg[1] = &base_nonce;
cd.arg[2] = &attempts_per_thread;
while(keep_running) while(keep_running)
{ {
// 1. Reset storage counter // Check stopping conditions
host_storage[0] = 1u; // Index 0 is the atomic counter. Start data at index 1. if(max_attempts > 0 && attempts >= max_attempts)
break;
double elapsed = get_wall_time() - start_time;
if(max_time > 0 && elapsed >= max_time)
break;
// Initialize storage area
host_storage[0] = 1u; // First unused index
// Copy to device
host_to_device_copy(&cd, 0); host_to_device_copy(&cd, 0);
// 2. Launch Kernel // Set kernel arguments
cd.n_kernel_arguments = 2;
cd.arg[0] = &cd.device_data[0];
cd.arg[1] = &base_nonce;
cd.arg[2] = &attempts_per_thread;
// Launch the CUDA kernel
launch_kernel(&cd); launch_kernel(&cd);
// 3. Retrieve Results // Copy results back
device_to_host_copy(&cd, 0); device_to_host_copy(&cd, 0);
// 4. Process Found Coins // Process found coins
u32_t next_write_idx = host_storage[0]; u32_t n_coins_this_kernel = 0;
u32_t num_u32_written = next_write_idx - 1; u32_t n_stored = (host_storage[0] - 1) / 14;
// Each coin is 14 u32 words if(n_stored > 0 && host_storage[0] < COINS_STORAGE_SIZE)
if(num_u32_written >= 14)
{ {
int coins_in_batch = num_u32_written / 14; for(u32_t i = 0; i < n_stored; i++)
for(int c = 0; c < coins_in_batch; c++) {
{ u32_t coin[14];
u32_t found_coin[14]; reconstruct_coin(&host_storage[1 + i * 14], coin);
// Copy from host buffer to temp array
for(int w=0; w<14; w++) {
found_coin[w] = host_storage[1 + (c * 14) + w];
}
// Verify/Save using required function coins_found++;
save_coin(found_coin); n_coins_this_kernel++;
coins_found_total++; printf("COIN FOUND! (kernel %u, coin %u in this kernel). Total coins:%u\n",
printf("Coin Found! Total: %u\n", coins_found_total); kernel_runs, n_coins_this_kernel, coins_found);
} save_coin(coin);
}
} }
// 5. Update Progress // Update counters
u64_t batch_attempts = (u64_t)total_threads * attempts_per_thread; kernel_runs++;
attempts += batch_attempts; u64_t attempts_this_launch = (u64_t)n_threads * attempts_per_thread;
base_nonce += batch_attempts; // Ensure next kernel uses new nonces attempts += attempts_this_launch;
base_nonce += attempts_this_launch;
// 6. Check Limits
if((max_attempts > 0 && attempts >= max_attempts) ||
(max_time > 0 && (get_wall_time() - start_time) >= max_time)) {
break;
}
} }
// Cleanup time_measurement();
double total_time = get_wall_time() - start_time; double total_time = cpu_time_delta();
printf("\nMining Finished.\n");
printf("Attempts: %llu\n", (unsigned long long)attempts); printf("\n=== Mining Statistics ===\n");
printf("Time: %.4fs\n", total_time); printf("Total attempts: %llu\n", (unsigned long long)attempts);
printf("Hashrate: %.2f MH/s\n", (attempts / total_time) / 1000000.0); printf("Total time: %.2f seconds\n", total_time);
printf("Average rate: %.2f attempts/second\n", attempts / total_time);
printf("Coins found: %u\n", coins_found);
printf("Kernel launches: %u\n", kernel_runs);
// Save any remaining coins
save_coin(NULL);
save_coin(NULL); // Flush vault
terminate_cuda(&cd); terminate_cuda(&cd);
} }
void print_usage(const char *prog_name)
{
printf("Usage: %s [OPTIONS]\n", prog_name);
printf("Options:\n");
printf(" -a <attempts> Maximum number of attempts\n");
printf(" -t <seconds> Maximum time in seconds\n");
printf(" -h Show this help message\n");
printf("\nExamples:\n");
printf(" %s -a 1000000000 # Run for 1B attempts\n", prog_name);
printf(" %s -t 60 # Run for 60 seconds\n", prog_name);
printf(" %s -a 1000000000 -t 60 # Stop at 1B attempts OR 60s (whichever first)\n", prog_name);
printf(" %s # Run indefinitely until Ctrl+C\n", prog_name);
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
u64_t max_attempts = 0; u64_t max_attempts = 0;
@ -136,17 +186,27 @@ int main(int argc, char *argv[])
signal(SIGINT, signal_handler); signal(SIGINT, signal_handler);
while((opt = getopt(argc, argv, "a:t:")) != -1) // Parse command line options
while((opt = getopt(argc, argv, "a:t:h")) != -1)
{ {
switch(opt) { switch(opt)
case 'a': max_attempts = strtoull(optarg, NULL, 10); break; {
case 't': max_time = atof(optarg); break; case 'a':
max_attempts = strtoull(optarg, NULL, 10);
break;
case 't':
max_time = atof(optarg);
break;
case 'h':
print_usage(argv[0]);
return 0;
default: default:
fprintf(stderr, "Usage: %s -a <attempts> -t <seconds>\n", argv[0]); print_usage(argv[0]);
return 1; return 1;
} }
} }
mine_coins_cuda(max_attempts, max_time); mine_coins_cuda(max_attempts, max_time);
return 0; return 0;
} }

View File

@ -1,192 +1,96 @@
// //
// Arquiteturas de Alto Desempenho 2025/2026 // Arquiteturas de Alto Desempenho 2025/2026
// //
// DETI Coin Miner - CUDA kernel (Optimized) // DETI Coin Miner - CUDA kernel (optimized for mining)
// //
#include "aad_sha1.h" #include "aad_sha1.h"
#include "aad_data_types.h" #include "aad_data_types.h"
// //
// Optimized CUDA kernel // Optimized CUDA kernel for DETI coin mining
// Each thread generates coins using the same approach as CPU/SIMD miners
// //
extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1)
extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread) void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread)
{ {
u32_t coin[16]; // SHA1 requires 16 words (64 bytes) u32_t coin[14];
u32_t hash[5]; u32_t hash[5];
u32_t n;
u08_t *bytes = (u08_t *)coin;
// 1. Initialize Fixed Prefix: "DETI coin 2 " (12 bytes) // Get thread index (used as offset from base counter)
coin[0] = (u32_t)'D' << 24 | (u32_t)'E' << 16 | (u32_t)'T' << 8 | (u32_t)'I'; n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x;
coin[1] = (u32_t)' ' << 24 | (u32_t)'c' << 16 | (u32_t)'o' << 8 | (u32_t)'i';
coin[2] = (u32_t)'n' << 24 | (u32_t)' ' << 16 | (u32_t)'2' << 8 | (u32_t)' ';
// 2. Initialize Variable Part (Bytes 12 to 53) // Initialize coin template: "DETI coin 2 " + variable + "\n\x80"
// Fill with a safe printable char ' ' (0x20) // Use byte-swapped format to match host expectations (idx ^ 3)
#pragma unroll coin[0] = ('D' << 24) + ('E' << 16) + ('T' << 8) + 'I';
for(int i = 3; i <= 12; i++) { coin[1] = (' ' << 24) + ('c' << 16) + ('o' << 8) + 'i';
coin[i] = 0x20202020; coin[2] = ('n' << 24) + (' ' << 16) + ('2' << 8) + ' ';
}
// Word 13 is partial variable + suffix
// Bytes 52, 53 are variable. Byte 54 is '\n', Byte 55 is 0x80 (Padding)
coin[13] = 0x20200A80;
// 3. Initialize SHA1 Length Padding // Fill the variable part of the coin with a pattern
// Message is 55 bytes. Length in bits = 55 * 8 = 440. for(int i = 3; i < 14; i++)
// SHA1 puts length at the very end (Word 15). coin[i] = 0x41414141; // 'AAAA'
coin[14] = 0x00000000;
coin[15] = 440;
// 4. Thread Unique Initialization // End with newline and padding
// Uses thread ID to set the initial state of the variable bytes bytes[0x36 ^ 3] = '\n'; // Position 54
// to ensure every thread starts at a different point. bytes[0x37 ^ 3] = 0x80; // Position 55
u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x;
u64_t nonce_offset = base_nonce + thread_id * attempts_per_thread;
// Seeding the message with the nonce (Fast update of specific bytes) for(u32_t i = 0; i < attempts_per_thread; ++i) {
u08_t *byte_ptr = (u08_t*)coin; // Initialize variable part (positions 12-53, 42 bytes)
// Start with A-Z pattern like CPU/SIMD miners
for(int j = 12; j < 54; j++)
bytes[j ^ 3] = 'A' + ((j - 12) % 26);
// Apply the nonce offset to the message structure // Calculate offset based on thread index and parameters
u64_t temp_nonce = nonce_offset; // This creates a unique starting point for each thread
for (int k = 12; k < 54 && temp_nonce > 0; k++) { u64_t offset = base_nonce + n + (u64_t)i * gridDim.x * blockDim.x;
u32_t val = byte_ptr[k ^ 3] + (temp_nonce % 95); // mod 95 to stay in printable ASCII
temp_nonce /= 95;
if (val > 0x7E) { // Wrap around printable range // Apply offset to variable part (increment the coin counter)
val -= 95; for(int pos = 53; pos >= 12 && offset > 0; pos--)
temp_nonce++; // Carry
}
byte_ptr[k ^ 3] = (u08_t)val;
}
// 5. Mining Loop
for(u32_t attempt = 0; attempt < attempts_per_thread; attempt++)
{
// --- SHA1 HASH CALCULATION ---
#define T u32_t
#define C(c) (c)
#define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
#define DATA(idx) coin[idx]
#define HASH(idx) hash[idx]
CUSTOM_SHA1_CODE();
#undef T
#undef C
#undef ROTATE
#undef DATA
#undef HASH
// --- CHECK RESULT ---
// Check for "aad20250" prefix (AAD20250 hex)
if(hash[0] == 0xAAD20250u)
{ {
// Found a candidate! Save it. u08_t *byte = &bytes[pos ^ 3];
u32_t idx = atomicAdd(&coins_storage_area[0], 14u); u64_t add = offset % 127;
offset /= 127;
// Boundary check (first word is count, data starts at index 1) u32_t val = *byte + add;
// We normalize the index to be relative to storage start u08_t new_val = val % 127;
if(idx < 1024u - 15u) // Ensure space
{ // Skip newline character (ASCII 10) in the variable part
// Store valid coin (14 words = 56 bytes, covers the 55 byte content) if(new_val == '\n')
// Adjust idx because coins_storage_area[0] is the counter new_val++;
for(int w=0; w<14; w++) {
coins_storage_area[idx + w] = coin[w]; *byte = new_val;
} offset += val / 127; // Carry
}
} }
// --- UPDATE MESSAGE (ODOMETER) --- // Compute SHA1 hash
// Increment the message string for the next attempt # define T u32_t
// Start at byte 53 (just before the \n) and work backwards if carry needed. # define C(c) (c)
# define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
int pos = 53; # define DATA(idx) coin[idx]
while (pos >= 12) { # define HASH(idx) hash[idx]
u08_t *b = &byte_ptr[pos ^ 3];
(*b)++;
if (*b <= 0x7E) {
break; // No carry, done incrementing
}
// Overflow printable range, reset to start of range (0x20) and carry
*b = 0x20;
pos--;
}
}
}
//
// Kernel: Mines a coin where the first 48 bytes are FIXED (the visual pattern)
// and only the last ~7 bytes are mutated to find the hash.
//
extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1)
void mine_visual_row_kernel(u32_t *coins_storage_area, u32_t *row_template, u64_t base_nonce)
{
u32_t coin[16]; // SHA1 working buffer
u32_t hash[5];
// 1. Load the template
#pragma unroll
for(int i = 0; i < 12; i++) {
coin[i] = row_template[i];
}
// 2. Setup the "Mining Area" (Bytes 48-53)
// Template provided by host: [ ... visual ... ] [ mining_space ] \n 0x80
coin[12] = 0x41414141; // Initialize mining space with 'AAAA'
coin[13] = row_template[13]; // This contains the \n (byte 54) and 0x80 (byte 55)
// SHA1 Length padding (55 bytes = 440 bits)
coin[14] = 0;
coin[15] = 440;
// 3. Thread unique nonce calculation
u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x;
u64_t nonce = base_nonce + thread_id; // Simple linear nonce
// 4. Map nonce to the "Mining Area" (Bytes 48-53)
// Change bytes from 48 to 53
u08_t *bytes = (u08_t*)coin;
u64_t temp_nonce = nonce;
for(int k = 48; k <= 53; k++)
{
// Map to printable ASCII (0x21 to 0x7E) to avoid forbidden \n
u32_t val = (bytes[k^3] + (temp_nonce % 90));
temp_nonce /= 90;
if(val > 0x7E) {
val = 0x21 + (val - 0x7E); // Wrap
temp_nonce++; // Carry
}
bytes[k^3] = (u08_t)val;
}
// 5. SHA1 Computation
#define T u32_t
#define C(c) (c)
#define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
#define DATA(idx) coin[idx]
#define HASH(idx) hash[idx]
CUSTOM_SHA1_CODE(); CUSTOM_SHA1_CODE();
# undef T
# undef C
# undef ROTATE
# undef DATA
# undef HASH
#undef T // Check if this is a valid DETI coin
#undef C
#undef ROTATE
#undef DATA
#undef HASH
// 6. Check Result
if(hash[0] == 0xAAD20250u) if(hash[0] == 0xAAD20250u)
{ {
u32_t idx = atomicAdd(&coins_storage_area[0], 14u); // Found a coin! Store it atomically
if(idx < 1024u - 15u) u32_t idx = atomicAdd(coins_storage_area, 14u);
{
// Save the found coin // Make sure we don't write outside buffer
for(int w=0; w<14; w++) { if(idx < 1024u - 14u)
coins_storage_area[idx + w] = coin[w]; {
} // Store the complete coin data
for(int k = 0; k < 14; k++)
coins_storage_area[idx + k] = coin[k];
}
} }
} }
} }

View File

@ -1,180 +0,0 @@
//
// Arquiteturas de Alto Desempenho 2025/2026
//
// DETI Coin Miner - DNA Helix Generator
//
#include <math.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include "aad_data_types.h"
#include "aad_sha1_cpu.h"
#include "aad_cuda_utilities.h"
#include "aad_vault.h"
#define PI 3.14159265
#define COINS_STORAGE_SIZE 1024u
static volatile int keep_running = 1;
void signal_handler(int signum) {
(void)signum;
keep_running = 0;
}
//
// Visual Generator: Creates one line of the DNA Helix
//
void generate_dna_row(int row_idx, u32_t *template_buffer) {
char line_str[64];
memset(line_str, 0, 64);
// 1. Standard Header (12 bytes)
memcpy(line_str, "DETI coin 2 ", 12);
// 2. The Visual Area (Bytes 12 to 47 -> 36 chars wide)
// We draw two sine waves.
// Center is roughly at relative index 18.
int width = 36;
int center = width / 2;
double amplitude = 14.0;
double frequency = 0.3;
// Fill background with space
for(int i=12; i < 48; i++) line_str[i] = ' ';
// Calculate positions
int pos1 = center + (int)(amplitude * sin(row_idx * frequency));
int pos2 = center + (int)(amplitude * sin(row_idx * frequency + PI)); // 180 deg out of phase
// Draw the helix strands
// Valid visual range is index 12 to 47
if(pos1 >= 0 && pos1 < width) line_str[12 + pos1] = '(';
if(pos2 >= 0 && pos2 < width) line_str[12 + pos2] = ')';
// Draw the "rungs" connecting the DNA strands
int left = (pos1 < pos2) ? pos1 : pos2;
int right = (pos1 < pos2) ? pos2 : pos1;
// Add some "biology" chars in the middle
if (row_idx % 2 == 0) {
int mid = 12 + (left + right) / 2;
line_str[mid] = (row_idx % 4 == 0) ? '-' : '+';
}
// 3. The Mining Area (Bytes 48-53)
// Initialize with placeholders (GPU will overwrite these)
for(int i=48; i<54; i++) line_str[i] = '.';
// 4. Mandatory Suffix
line_str[54] = '\n';
line_str[55] = (char)0x80; // Padding
// 5. Convert char buffer to u32 array (Endian safe copy)
// We copy 14 words (56 bytes)
for(int i=0; i<14; i++) {
u08_t *ptr = (u08_t*)&template_buffer[i];
ptr[3] = line_str[i*4 + 0];
ptr[2] = line_str[i*4 + 1];
ptr[1] = line_str[i*4 + 2];
ptr[0] = line_str[i*4 + 3];
}
}
int main(int argc, char *argv[])
{
cuda_data_t cd;
u32_t *host_storage;
u32_t *host_template;
u64_t base_nonce = 0;
int current_row = 0;
signal(SIGINT, signal_handler);
// Initialize CUDA
memset(&cd, 0, sizeof(cd));
cd.device_number = 0;
cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
cd.kernel_name = "mine_visual_row_kernel"; // Note the new kernel name
// Allocations
cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t); // Storage for found coins
cd.data_size[1] = 16 * sizeof(u32_t); // Storage for the Row Template
initialize_cuda(&cd);
host_storage = (u32_t *)cd.host_data[0];
host_template = (u32_t *)cd.host_data[1];
// Configure Kernel
cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE;
cd.grid_dim_x = 128; // Smaller grid is fine since we stop as soon as we find ONE coin
printf("Generating DNA Helix Blockchain...\n");
printf("Press Ctrl+C to stop.\n\n");
cd.n_kernel_arguments = 3;
cd.arg[0] = &cd.device_data[0]; // Storage
cd.arg[1] = &cd.device_data[1]; // Template
cd.arg[2] = &base_nonce; // Nonce
while(keep_running)
{
// 1. Generate the visual template for this specific row
generate_dna_row(current_row, host_template);
// 2. Reset storage counter
host_storage[0] = 1u;
// 3. Copy Template and Reset Counter to GPU
// We copy both buffers (idx 0 and 1)
host_to_device_copy(&cd, 0);
host_to_device_copy(&cd, 1);
int coin_found = 0;
// 4. Loop until we find a coin for THIS row
while(!coin_found && keep_running) {
cd.arg[2] = &base_nonce; // Update nonce pointer arg
launch_kernel(&cd);
// Check if we found something
device_to_host_copy(&cd, 0);
u32_t count = host_storage[0];
if(count > 1) {
// Coin found!
u32_t coin[14];
// Extract the first found coin
for(int i=0; i<14; i++) coin[i] = host_storage[1+i];
save_coin(coin); // Save to disk
// Visual Feedback to Console (Reconstruct char string for display)
char debug_str[56];
for(int i=0; i<14; i++) {
u32_t w = coin[i];
debug_str[i*4+0] = (w >> 24) & 0xFF;
debug_str[i*4+1] = (w >> 16) & 0xFF;
debug_str[i*4+2] = (w >> 8) & 0xFF;
debug_str[i*4+3] = w & 0xFF;
}
// Only print the visual part (hide the ugly mining bits at the end)
printf("%.54s\n", debug_str);
coin_found = 1;
current_row++; // Advance to next visual row
}
base_nonce += (cd.grid_dim_x * cd.block_dim_x);
}
}
save_coin(NULL);
terminate_cuda(&cd);
return 0;
}

View File

@ -211,13 +211,13 @@ static void mine_coins_avx(u64_t max_attempts, double max_time)
} }
// Print progress every 1M attempts // Print progress every 1M attempts
// if(attempts % 1000000 < SIMD_WIDTH) if(attempts % 1000000 < SIMD_WIDTH)
// { {
// elapsed = get_wall_time() - start_time; elapsed = get_wall_time() - start_time;
// double rate = attempts / elapsed; double rate = attempts / elapsed;
// printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n", printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n",
// (unsigned long long)attempts, rate / 1e6, coins_found, elapsed); (unsigned long long)attempts, rate / 1e6, coins_found, elapsed);
// } }
} }
double total_time = get_wall_time() - start_time; double total_time = get_wall_time() - start_time;
@ -313,13 +313,13 @@ static void mine_coins_avx2(u64_t max_attempts, double max_time)
} }
} }
// if(attempts % 1000000 < SIMD_WIDTH) if(attempts % 1000000 < SIMD_WIDTH)
// { {
// elapsed = get_wall_time() - start_time; elapsed = get_wall_time() - start_time;
// double rate = attempts / elapsed; double rate = attempts / elapsed;
// printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n", printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n",
// (unsigned long long)attempts, rate / 1e6, coins_found, elapsed); (unsigned long long)attempts, rate / 1e6, coins_found, elapsed);
// } }
} }
double total_time = get_wall_time() - start_time; double total_time = get_wall_time() - start_time;
@ -452,14 +452,14 @@ static void mine_coins_avx2_omp(u64_t max_attempts, double max_time)
#pragma omp atomic read #pragma omp atomic read
current_attempts = attempts; current_attempts = attempts;
// if(current_attempts - last_reported_attempts >= 1000000) if(current_attempts - last_reported_attempts >= 1000000)
// { {
// double elapsed = get_wall_time() - start_time; double elapsed = get_wall_time() - start_time;
// double rate = current_attempts / elapsed; double rate = current_attempts / elapsed;
// printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n", printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Elapsed: %.2fs\n",
// (unsigned long long)current_attempts, rate / 1e6, coins_found, elapsed); (unsigned long long)current_attempts, rate / 1e6, coins_found, elapsed);
// last_reported_attempts = current_attempts; last_reported_attempts = current_attempts;
// } }
} }
} }
} }

View File

@ -28,7 +28,7 @@
// //
// we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...) // we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...)
// //
#define RECOMMENDED_CUDA_BLOCK_SIZE 256 #define RECOMMENDED_CUDA_BLOCK_SIZE 128
// //

View File

@ -42,7 +42,7 @@ CUDA_ARCH = sm_86
clean: clean:
rm -f sha1_tests rm -f sha1_tests
rm -f sha1_cuda_test sha1_cuda_kernel.cubin rm -f sha1_cuda_test sha1_cuda_kernel.cubin
rm -f coin_miner_cpu coin_miner_simd coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl coin_miner_dna_shape_cuda rm -f coin_miner_cpu coin_miner_simd coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl
rm -f coin_miner_wasm.js coin_miner_wasm.wasm rm -f coin_miner_wasm.js coin_miner_wasm.wasm
rm -f benchmark rm -f benchmark
rm -f a.out rm -f a.out
@ -77,7 +77,7 @@ coin_miner_cpu: aad_coin_miner_cpu.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@ cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
coin_miner_simd: aad_coin_miner_simd.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile coin_miner_simd: aad_coin_miner_simd.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h makefile
cc -march=native -Wall -Wshadow -fopenmp -mavx2 -O3 $< -o $@ cc -march=native -Wall -Wshadow -Werror -fopenmp -mavx2 -O3 $< -o $@
coin_miner_cuda_kernel.cubin: aad_coin_miner_cuda_kernel.cu aad_sha1.h makefile coin_miner_cuda_kernel.cubin: aad_coin_miner_cuda_kernel.cu aad_sha1.h makefile
nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@ nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@
@ -88,9 +88,6 @@ coin_miner_cuda: aad_coin_miner_cuda.c coin_miner_cuda_kernel.cubin aad_sha1.h a
coin_miner_ocl: aad_coin_miner_ocl.c aad_coin_miner_ocl_kernel.cl aad_sha1.h aad_sha1_cpu.h aad_sha1_ocl_kernel.cl aad_data_types.h aad_utilities.h aad_vault.h aad_ocl_utilities.h makefile coin_miner_ocl: aad_coin_miner_ocl.c aad_coin_miner_ocl_kernel.cl aad_sha1.h aad_sha1_cpu.h aad_sha1_ocl_kernel.cl aad_data_types.h aad_utilities.h aad_vault.h aad_ocl_utilities.h makefile
cc -march=native -Wall -Wshadow -O3 $< -o $@ -lOpenCL cc -march=native -Wall -Wshadow -O3 $< -o $@ -lOpenCL
coin_miner_dna_shape_cuda: aad_coin_miner_dna_shape_cuda.c coin_miner_cuda_kernel.cubin aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h aad_cuda_utilities.h makefile
cc -march=native -Wall -Wshadow -Werror -O3 -I$(CUDA_DIR)/include $< -o $@ -lcuda -lm
coin_miner_wasm: aad_coin_miner_wasm.c aad_sha1.h aad_sha1_cpu.h aad_sha1_wasm.h aad_data_types.h aad_utilities.h aad_vault.h makefile coin_miner_wasm: aad_coin_miner_wasm.c aad_sha1.h aad_sha1_cpu.h aad_sha1_wasm.h aad_data_types.h aad_utilities.h aad_vault.h makefile
emcc -O3 -flto -msimd128 -o coin_miner_wasm.js aad_coin_miner_wasm.c \ emcc -O3 -flto -msimd128 -o coin_miner_wasm.js aad_coin_miner_wasm.c \
-s WASM=1 \ -s WASM=1 \
@ -101,8 +98,11 @@ coin_miner_wasm: aad_coin_miner_wasm.c aad_sha1.h aad_sha1_cpu.h aad_sha1_wasm.h
-s EXPORT_NAME='CoinMinerModule' \ -s EXPORT_NAME='CoinMinerModule' \
-s INITIAL_MEMORY=67108864 -s INITIAL_MEMORY=67108864
benchmark: aad_benchmark.c aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h makefile
cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@
miners: coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_ocl miners: coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_ocl benchmark
all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin \ all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin \
coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl coin_miner_cpu coin_miner_simd coin_miner_wasm coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl \
benchmark