// // Arquiteturas de Alto Desempenho 2025/2026 // // DETI Coin Miner - CUDA kernel (Optimized) // #include "aad_sha1.h" #include "aad_data_types.h" // // Optimized CUDA kernel // extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1) void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread) { u32_t coin[16]; // SHA1 requires 16 words (64 bytes) u32_t hash[5]; // 1. Initialize Fixed Prefix: "DETI coin 2 " (12 bytes) coin[0] = (u32_t)'D' << 24 | (u32_t)'E' << 16 | (u32_t)'T' << 8 | (u32_t)'I'; coin[1] = (u32_t)' ' << 24 | (u32_t)'c' << 16 | (u32_t)'o' << 8 | (u32_t)'i'; coin[2] = (u32_t)'n' << 24 | (u32_t)' ' << 16 | (u32_t)'2' << 8 | (u32_t)' '; // 2. Initialize Variable Part (Bytes 12 to 53) // Fill with a safe printable char ' ' (0x20) #pragma unroll for(int i = 3; i <= 12; i++) { coin[i] = 0x20202020; } // Word 13 is partial variable + suffix // Bytes 52, 53 are variable. Byte 54 is '\n', Byte 55 is 0x80 (Padding) coin[13] = 0x20200A80; // 3. Initialize SHA1 Length Padding // Message is 55 bytes. Length in bits = 55 * 8 = 440. // SHA1 puts length at the very end (Word 15). coin[14] = 0x00000000; coin[15] = 440; // 4. Thread Unique Initialization // Uses thread ID to set the initial state of the variable bytes // to ensure every thread starts at a different point. u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x; u64_t nonce_offset = base_nonce + thread_id * attempts_per_thread; // Seeding the message with the nonce (Fast update of specific bytes) u08_t *byte_ptr = (u08_t*)coin; // Apply the nonce offset to the message structure u64_t temp_nonce = nonce_offset; for (int k = 12; k < 54 && temp_nonce > 0; k++) { u32_t val = byte_ptr[k ^ 3] + (temp_nonce % 95); // mod 95 to stay in printable ASCII temp_nonce /= 95; if (val > 0x7E) { // Wrap around printable range val -= 95; temp_nonce++; // Carry } byte_ptr[k ^ 3] = (u08_t)val; } // 5. Mining Loop for(u32_t attempt = 0; attempt < attempts_per_thread; attempt++) { // --- SHA1 HASH CALCULATION --- #define T u32_t #define C(c) (c) #define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) #define DATA(idx) coin[idx] #define HASH(idx) hash[idx] CUSTOM_SHA1_CODE(); #undef T #undef C #undef ROTATE #undef DATA #undef HASH // --- CHECK RESULT --- // Check for "aad20250" prefix (AAD20250 hex) if(hash[0] == 0xAAD20250u) { // Found a candidate! Save it. u32_t idx = atomicAdd(&coins_storage_area[0], 14u); // Boundary check (first word is count, data starts at index 1) // We normalize the index to be relative to storage start if(idx < 1024u - 15u) // Ensure space { // Store valid coin (14 words = 56 bytes, covers the 55 byte content) // Adjust idx because coins_storage_area[0] is the counter for(int w=0; w<14; w++) { coins_storage_area[idx + w] = coin[w]; } } } // --- UPDATE MESSAGE (ODOMETER) --- // Increment the message string for the next attempt // Start at byte 53 (just before the \n) and work backwards if carry needed. int pos = 53; while (pos >= 12) { u08_t *b = &byte_ptr[pos ^ 3]; (*b)++; if (*b <= 0x7E) { break; // No carry, done incrementing } // Overflow printable range, reset to start of range (0x20) and carry *b = 0x20; pos--; } } } // // Kernel: Mines a coin where the first 48 bytes are FIXED (the visual pattern) // and only the last ~7 bytes are mutated to find the hash. // extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1) void mine_visual_row_kernel(u32_t *coins_storage_area, u32_t *row_template, u64_t base_nonce) { u32_t coin[16]; // SHA1 working buffer u32_t hash[5]; // 1. Load the template #pragma unroll for(int i = 0; i < 12; i++) { coin[i] = row_template[i]; } // 2. Setup the "Mining Area" (Bytes 48-53) // Template provided by host: [ ... visual ... ] [ mining_space ] \n 0x80 coin[12] = 0x41414141; // Initialize mining space with 'AAAA' coin[13] = row_template[13]; // This contains the \n (byte 54) and 0x80 (byte 55) // SHA1 Length padding (55 bytes = 440 bits) coin[14] = 0; coin[15] = 440; // 3. Thread unique nonce calculation u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x; u64_t nonce = base_nonce + thread_id; // Simple linear nonce // 4. Map nonce to the "Mining Area" (Bytes 48-53) // Change bytes from 48 to 53 u08_t *bytes = (u08_t*)coin; u64_t temp_nonce = nonce; for(int k = 48; k <= 53; k++) { // Map to printable ASCII (0x21 to 0x7E) to avoid forbidden \n u32_t val = (bytes[k^3] + (temp_nonce % 90)); temp_nonce /= 90; if(val > 0x7E) { val = 0x21 + (val - 0x7E); // Wrap temp_nonce++; // Carry } bytes[k^3] = (u08_t)val; } // 5. SHA1 Computation #define T u32_t #define C(c) (c) #define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) #define DATA(idx) coin[idx] #define HASH(idx) hash[idx] CUSTOM_SHA1_CODE(); #undef T #undef C #undef ROTATE #undef DATA #undef HASH // 6. Check Result if(hash[0] == 0xAAD20250u) { u32_t idx = atomicAdd(&coins_storage_area[0], 14u); if(idx < 1024u - 15u) { // Save the found coin for(int w=0; w<14; w++) { coins_storage_area[idx + w] = coin[w]; } } } }