// // Arquiteturas de Alto Desempenho 2025/2026 // // DETI Coin Miner - CUDA kernel (optimized for mining) // #include "aad_sha1.h" typedef unsigned int u32_t; typedef unsigned char u08_t; // // Optimized CUDA kernel for DETI coin mining // Each thread generates its own message based on thread coordinates and external parameters // extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1) void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2) { u32_t coin[14]; u32_t hash[5]; u32_t n, warp_id, lane_id; // Get thread coordinates n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x; warp_id = n >> 5u; lane_id = n & 31u; // Initialize coin template: "DETI coin 2 " + variable + "\n\x80" // Use byte-swapped format to match host expectations (idx ^ 3) coin[0] = 0x44455449u; // "DETI" with byte swap coin[1] = 0x20636F69u; // " coi" with byte swap coin[2] = 0x6E203220u; // "n 2 " with byte swap // Variable part: encode thread ID and parameters // This ensures each thread works on a different message coin[3] = n; // Global thread ID coin[4] = param1; // External parameter 1 coin[5] = param2; // External parameter 2 coin[6] = blockIdx.x; // Block index coin[7] = threadIdx.x; // Thread index coin[8] = warp_id; // Warp ID coin[9] = lane_id; // Lane ID coin[10] = n ^ param1; // XOR combination coin[11] = n ^ param2; // XOR combination coin[12] = (n * 0x9E3779B9u); // Hash-like mixing // Last word: bytes 52-55 // Memory layout: coin[13]=0xAABBCCDD -> mem[52]=DD, [53]=CC, [54]=BB, [55]=AA // With idx^3: bytes[52^3]=bytes[55]=AA, bytes[53^3]=bytes[54]=BB, bytes[54^3]=bytes[53]=CC, bytes[55^3]=bytes[52]=DD // We want: bytes[54^3]=0x0A (newline), bytes[55^3]=0x80 (padding) // So: bytes[53]=0x0A, bytes[52]=0x80 -> coin[13]=0x????0A80 coin[13] = ((n & 0xFFFFu) << 16) | 0x0A80u; // Top 2 bytes: variable, bottom: 0x80 0x0A // Compute SHA1 hash # define T u32_t # define C(c) (c) # define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) # define DATA(idx) coin[idx] # define HASH(idx) hash[idx] CUSTOM_SHA1_CODE(); # undef T # undef C # undef ROTATE # undef DATA # undef HASH // Check if this is a valid DETI coin if(hash[0] == 0xAAD20250u) { // Found a coin! Store it atomically u32_t idx = atomicAdd(coins_storage_area, 14u); // Make sure we don't write outside buffer if(idx < 1024u - 14u) { // Store the coin data (only variable parts needed) coins_storage_area[idx + 0] = coin[ 3]; coins_storage_area[idx + 1] = coin[ 4]; coins_storage_area[idx + 2] = coin[ 5]; coins_storage_area[idx + 3] = coin[ 6]; coins_storage_area[idx + 4] = coin[ 7]; coins_storage_area[idx + 5] = coin[ 8]; coins_storage_area[idx + 6] = coin[ 9]; coins_storage_area[idx + 7] = coin[10]; coins_storage_area[idx + 8] = coin[11]; coins_storage_area[idx + 9] = coin[12]; coins_storage_area[idx + 10] = coin[13]; // Store hash value for verification coins_storage_area[idx + 11] = hash[1]; coins_storage_area[idx + 12] = hash[2]; coins_storage_area[idx + 13] = hash[3]; } } } // // Kernel that tries all possible values for one character position // extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1) void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2, int scan_position) { u32_t coin[14]; u32_t hash[5]; u32_t n; n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x; // Initialize coin template (with byte swap for idx ^ 3 convention) coin[0] = 0x44455449u; // "DETI" with byte swap coin[1] = 0x20636F69u; // " coi" with byte swap coin[2] = 0x6E203220u; // "n 2 " with byte swap // Variable part coin[3] = param1; coin[4] = param2; coin[5] = n >> 8; // High bits of n coin[6] = blockIdx.x; coin[7] = threadIdx.x; coin[8] = param1 ^ param2; coin[9] = n & 0xFFu; // Low 8 bits of n coin[10] = param1 + n; coin[11] = param2 - n; coin[12] = (n * 0x9E3779B9u); coin[13] = ((n & 0xFFFFu) << 16) | 0x0A80u; // Top 2 bytes: variable, bottom: 0x80 0x0A // Try all possible values for the scan position (0-255) // This allows exploring a full byte range in a single kernel launch for(u32_t val = 0; val < 256u; val++) { // Insert the test value at the scan position u32_t word_idx = scan_position / 4; u32_t byte_pos = scan_position % 4; u32_t shift = byte_pos * 8; if(word_idx >= 3 && word_idx < 13) { u32_t mask = ~(0xFFu << shift); coin[word_idx] = (coin[word_idx] & mask) | (val << shift); // Make sure we don't use newline in the middle u08_t *bytes = (u08_t *)coin; if(scan_position < 54 && bytes[scan_position ^ 3] == 0x0A) continue; } // Compute SHA1 hash # define T u32_t # define C(c) (c) # define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) # define DATA(idx) coin[idx] # define HASH(idx) hash[idx] CUSTOM_SHA1_CODE(); # undef T # undef C # undef ROTATE # undef DATA # undef HASH // Check if valid coin if(hash[0] == 0xAAD20250u) { u32_t idx = atomicAdd(coins_storage_area, 14u); if(idx < 1024u - 14u) { coins_storage_area[idx + 0] = coin[ 3]; coins_storage_area[idx + 1] = coin[ 4]; coins_storage_area[idx + 2] = coin[ 5]; coins_storage_area[idx + 3] = coin[ 6]; coins_storage_area[idx + 4] = coin[ 7]; coins_storage_area[idx + 5] = coin[ 8]; coins_storage_area[idx + 6] = coin[ 9]; coins_storage_area[idx + 7] = coin[10]; coins_storage_area[idx + 8] = coin[11]; coins_storage_area[idx + 9] = coin[12]; coins_storage_area[idx + 10] = coin[13]; coins_storage_area[idx + 11] = hash[1]; coins_storage_area[idx + 12] = hash[2]; coins_storage_area[idx + 13] = hash[3]; } } } }