diff --git a/.gitignore b/.gitignore index 71332c8..a0a39af 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,8 @@ coin_miner_cuda coin_miner_ocl coin_miner_wasm.js coin_miner_wasm.wasm +coin_miner_cpu_threads +coin_miner_dna_shape_cuda # Vault deti_coins*_vault.txt diff --git a/aad_coin_miner_cuda_kernel.cu b/aad_coin_miner_cuda_kernel.cu index 0d16421..60358dc 100644 --- a/aad_coin_miner_cuda_kernel.cu +++ b/aad_coin_miner_cuda_kernel.cu @@ -124,4 +124,87 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t a pos--; } } +} + +// +// Kernel: Mines a coin where the first 48 bytes are FIXED (the visual pattern) +// and only the last ~7 bytes are mutated to find the hash. +// +extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1) +void mine_visual_row_kernel(u32_t *coins_storage_area, u32_t *row_template, u64_t base_nonce) +{ + u32_t coin[16]; // SHA1 working buffer + u32_t hash[5]; + + // 1. Load the template (Fixed Visual Part) + // The host has already prepared "DETI coin 2 " + "The DNA Pattern" + // We copy the first 12 words (48 bytes) exactly as they are. + #pragma unroll + for(int i = 0; i < 12; i++) { + coin[i] = row_template[i]; + } + + // 2. Setup the "Mining Area" (Bytes 48-53) + // We use word 12 and part of word 13 for the nonce. + // Word 13 also contains the \n and 0x80 padding. + // Template provided by host: [ ... visual ... ] [ mining_space ] \n 0x80 + + coin[12] = 0x41414141; // Initialize mining space with 'AAAA' + coin[13] = row_template[13]; // This contains the \n (byte 54) and 0x80 (byte 55) + + // SHA1 Length padding (55 bytes = 440 bits) + coin[14] = 0; + coin[15] = 440; + + // 3. Thread unique nonce calculation + u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x; + u64_t nonce = base_nonce + thread_id; // Simple linear nonce + + // 4. Map nonce to the "Mining Area" (Bytes 48-53) + // We manipulate bytes 48, 49, 50, 51 (Word 12) and 52, 53 (Low half of Word 13) + u08_t *bytes = (u08_t*)coin; + + // We use an Odometer approach on the specific bytes allowed for mining + // so we don't disturb the beautiful visual pattern on the left. + u64_t temp_nonce = nonce; + for(int k = 48; k <= 53; k++) + { + // Map to printable ASCII (0x21 to 0x7E) to avoid forbidden \n + u32_t val = (bytes[k^3] + (temp_nonce % 90)); + temp_nonce /= 90; + + if(val > 0x7E) { + val = 0x21 + (val - 0x7E); // Wrap + temp_nonce++; // Carry + } + bytes[k^3] = (u08_t)val; + } + + // 5. SHA1 Computation + #define T u32_t + #define C(c) (c) + #define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) + #define DATA(idx) coin[idx] + #define HASH(idx) hash[idx] + + CUSTOM_SHA1_CODE(); + + #undef T + #undef C + #undef ROTATE + #undef DATA + #undef HASH + + // 6. Check Result + if(hash[0] == 0xAAD20250u) + { + u32_t idx = atomicAdd(&coins_storage_area[0], 14u); + if(idx < 1024u - 15u) + { + // Save the found coin + for(int w=0; w<14; w++) { + coins_storage_area[idx + w] = coin[w]; + } + } + } } \ No newline at end of file diff --git a/aad_coin_miner_dna_shape_cuda.c b/aad_coin_miner_dna_shape_cuda.c new file mode 100644 index 0000000..fe43218 --- /dev/null +++ b/aad_coin_miner_dna_shape_cuda.c @@ -0,0 +1,180 @@ +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// DETI Coin Miner - DNA Helix Generator +// + +#include +#include +#include +#include +#include +#include +#include "aad_data_types.h" +#include "aad_sha1_cpu.h" +#include "aad_cuda_utilities.h" +#include "aad_vault.h" + +#define PI 3.14159265 +#define COINS_STORAGE_SIZE 1024u + +static volatile int keep_running = 1; + +void signal_handler(int signum) { + (void)signum; + keep_running = 0; +} + +// +// Visual Generator: Creates one line of the DNA Helix +// +void generate_dna_row(int row_idx, u32_t *template_buffer) { + char line_str[64]; + memset(line_str, 0, 64); + + // 1. Standard Header (12 bytes) + memcpy(line_str, "DETI coin 2 ", 12); + + // 2. The Visual Area (Bytes 12 to 47 -> 36 chars wide) + // We draw two sine waves. + // Center is roughly at relative index 18. + int width = 36; + int center = width / 2; + double amplitude = 14.0; + double frequency = 0.3; + + // Fill background with space + for(int i=12; i < 48; i++) line_str[i] = ' '; + + // Calculate positions + int pos1 = center + (int)(amplitude * sin(row_idx * frequency)); + int pos2 = center + (int)(amplitude * sin(row_idx * frequency + PI)); // 180 deg out of phase + + // Draw the helix strands + // Valid visual range is index 12 to 47 + if(pos1 >= 0 && pos1 < width) line_str[12 + pos1] = '('; + if(pos2 >= 0 && pos2 < width) line_str[12 + pos2] = ')'; + + // Draw the "rungs" connecting the DNA strands + int left = (pos1 < pos2) ? pos1 : pos2; + int right = (pos1 < pos2) ? pos2 : pos1; + + // Add some "biology" chars in the middle + if (row_idx % 2 == 0) { + int mid = 12 + (left + right) / 2; + line_str[mid] = (row_idx % 4 == 0) ? '-' : '+'; + } + + // 3. The Mining Area (Bytes 48-53) + // Initialize with placeholders (GPU will overwrite these) + for(int i=48; i<54; i++) line_str[i] = '.'; + + // 4. Mandatory Suffix + line_str[54] = '\n'; + line_str[55] = (char)0x80; // Padding + + // 5. Convert char buffer to u32 array (Endian safe copy) + // We copy 14 words (56 bytes) + for(int i=0; i<14; i++) { + u08_t *ptr = (u08_t*)&template_buffer[i]; + ptr[3] = line_str[i*4 + 0]; + ptr[2] = line_str[i*4 + 1]; + ptr[1] = line_str[i*4 + 2]; + ptr[0] = line_str[i*4 + 3]; + } +} + +int main(int argc, char *argv[]) +{ + cuda_data_t cd; + u32_t *host_storage; + u32_t *host_template; + u64_t base_nonce = 0; + int current_row = 0; + + signal(SIGINT, signal_handler); + + // Initialize CUDA + memset(&cd, 0, sizeof(cd)); + cd.device_number = 0; + cd.cubin_file_name = "coin_miner_cuda_kernel.cubin"; + cd.kernel_name = "mine_visual_row_kernel"; // Note the new kernel name + + // Allocations + cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t); // Storage for found coins + cd.data_size[1] = 16 * sizeof(u32_t); // Storage for the Row Template + + initialize_cuda(&cd); + + host_storage = (u32_t *)cd.host_data[0]; + host_template = (u32_t *)cd.host_data[1]; + + // Configure Kernel + cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE; + cd.grid_dim_x = 128; // Smaller grid is fine since we stop as soon as we find ONE coin + + printf("Generating DNA Helix Blockchain...\n"); + printf("Press Ctrl+C to stop.\n\n"); + + cd.n_kernel_arguments = 3; + cd.arg[0] = &cd.device_data[0]; // Storage + cd.arg[1] = &cd.device_data[1]; // Template + cd.arg[2] = &base_nonce; // Nonce + + while(keep_running) + { + // 1. Generate the visual template for this specific row + generate_dna_row(current_row, host_template); + + // 2. Reset storage counter + host_storage[0] = 1u; + + // 3. Copy Template and Reset Counter to GPU + // We copy both buffers (idx 0 and 1) + host_to_device_copy(&cd, 0); + host_to_device_copy(&cd, 1); + + int coin_found = 0; + + // 4. Loop until we find a coin for THIS row + while(!coin_found && keep_running) { + + cd.arg[2] = &base_nonce; // Update nonce pointer arg + launch_kernel(&cd); + + // Check if we found something + device_to_host_copy(&cd, 0); + + u32_t count = host_storage[0]; + if(count > 1) { + // Coin found! + u32_t coin[14]; + // Extract the first found coin + for(int i=0; i<14; i++) coin[i] = host_storage[1+i]; + + save_coin(coin); // Save to disk + + // Visual Feedback to Console (Reconstruct char string for display) + char debug_str[56]; + for(int i=0; i<14; i++) { + u32_t w = coin[i]; + debug_str[i*4+0] = (w >> 24) & 0xFF; + debug_str[i*4+1] = (w >> 16) & 0xFF; + debug_str[i*4+2] = (w >> 8) & 0xFF; + debug_str[i*4+3] = w & 0xFF; + } + // Only print the visual part (hide the ugly mining bits at the end) + printf("%.54s\n", debug_str); + + coin_found = 1; + current_row++; // Advance to next visual row + } + + base_nonce += (cd.grid_dim_x * cd.block_dim_x); + } + } + + save_coin(NULL); + terminate_cuda(&cd); + return 0; +} \ No newline at end of file diff --git a/makefile b/makefile index 261530a..722359d 100644 --- a/makefile +++ b/makefile @@ -42,7 +42,7 @@ CUDA_ARCH = sm_86 clean: rm -f sha1_tests rm -f sha1_cuda_test sha1_cuda_kernel.cubin - rm -f coin_miner_cpu coin_miner_simd coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl + rm -f coin_miner_cpu coin_miner_simd coin_miner_cuda coin_miner_cuda_kernel.cubin coin_miner_ocl coin_miner_dna_shape_cuda rm -f coin_miner_wasm.js coin_miner_wasm.wasm rm -f benchmark rm -f a.out @@ -88,6 +88,9 @@ coin_miner_cuda: aad_coin_miner_cuda.c coin_miner_cuda_kernel.cubin aad_sha1.h a coin_miner_ocl: aad_coin_miner_ocl.c aad_coin_miner_ocl_kernel.cl aad_sha1.h aad_sha1_cpu.h aad_sha1_ocl_kernel.cl aad_data_types.h aad_utilities.h aad_vault.h aad_ocl_utilities.h makefile cc -march=native -Wall -Wshadow -O3 $< -o $@ -lOpenCL +coin_miner_dna_shape_cuda: aad_coin_miner_dna_shape_cuda.c coin_miner_cuda_kernel.cubin aad_sha1.h aad_sha1_cpu.h aad_data_types.h aad_utilities.h aad_vault.h aad_cuda_utilities.h makefile + cc -march=native -Wall -Wshadow -Werror -O3 -I$(CUDA_DIR)/include $< -o $@ -lcuda -lm + coin_miner_wasm: aad_coin_miner_wasm.c aad_sha1.h aad_sha1_cpu.h aad_sha1_wasm.h aad_data_types.h aad_utilities.h aad_vault.h makefile emcc -O3 -flto -msimd128 -o coin_miner_wasm.js aad_coin_miner_wasm.c \ -s WASM=1 \