// // Arquiteturas de Alto Desempenho 2025/2026 // // DETI Coin Miner - CUDA kernel (Optimized) // #include "aad_sha1.h" #include "aad_data_types.h" // // Optimized CUDA kernel // extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1) void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread) { u32_t coin[16]; // SHA1 requires 16 words (64 bytes) u32_t hash[5]; // 1. Initialize Fixed Prefix: "DETI coin 2 " (12 bytes) // We construct this directly into the u32 array. // Note: We assume the system is Little Endian, but SHA1 input via macro usually handles bytes. // Ideally, we pack bytes: 'D','E','T','I' -> 0x44455449 // Word 0: "DETI" coin[0] = (u32_t)'D' << 24 | (u32_t)'E' << 16 | (u32_t)'T' << 8 | (u32_t)'I'; // Word 1: " coi" coin[1] = (u32_t)' ' << 24 | (u32_t)'c' << 16 | (u32_t)'o' << 8 | (u32_t)'i'; // Word 2: "n 2 " coin[2] = (u32_t)'n' << 24 | (u32_t)' ' << 16 | (u32_t)'2' << 8 | (u32_t)' '; // 2. Initialize Variable Part (Bytes 12 to 53) // Fill with a safe printable char ' ' (0x20) #pragma unroll for(int i = 3; i <= 12; i++) { coin[i] = 0x20202020; } // Word 13 is partial variable + suffix // Bytes 52, 53 are variable. Byte 54 is '\n', Byte 55 is 0x80 (Padding) coin[13] = 0x20200A80; // 3. Initialize SHA1 Length Padding // Message is 55 bytes. Length in bits = 55 * 8 = 440. // SHA1 puts length at the very end (Word 15). coin[14] = 0x00000000; coin[15] = 440; // 4. Thread Unique Initialization // We use the thread ID to set the initial state of the variable bytes // to ensure every thread starts at a different point. u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x; u64_t nonce_offset = base_nonce + thread_id * attempts_per_thread; // "Seeding" the message with the nonce (Fast update of specific bytes) // We modify the bytes in words 3 through 12. // Accessing as byte pointer for easier manipulation u08_t *byte_ptr = (u08_t*)coin; // Apply the nonce offset to the message structure (Odometer setup) // Start modifying from byte 12 u64_t temp_nonce = nonce_offset; for (int k = 12; k < 54 && temp_nonce > 0; k++) { u32_t val = byte_ptr[k ^ 3] + (temp_nonce % 95); // mod 95 to stay in printable ASCII temp_nonce /= 95; if (val > 0x7E) { // Wrap around printable range val -= 95; temp_nonce++; // Carry } byte_ptr[k ^ 3] = (u08_t)val; } // 5. Mining Loop for(u32_t attempt = 0; attempt < attempts_per_thread; attempt++) { // --- SHA1 HASH CALCULATION --- #define T u32_t #define C(c) (c) #define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) #define DATA(idx) coin[idx] #define HASH(idx) hash[idx] CUSTOM_SHA1_CODE(); #undef T #undef C #undef ROTATE #undef DATA #undef HASH // --- CHECK RESULT --- // Check for "aad20250" prefix (AAD20250 hex) if(hash[0] == 0xAAD20250u) { // Found a candidate! Save it. u32_t idx = atomicAdd(&coins_storage_area[0], 14u); // Boundary check (first word is count, data starts at index 1) // We normalize the index to be relative to storage start if(idx < 1024u - 15u) // Ensure space { // Store valid coin (14 words = 56 bytes, covers the 55 byte content) // Adjust idx because coins_storage_area[0] is the counter for(int w=0; w<14; w++) { coins_storage_area[idx + w] = coin[w]; } } } // --- UPDATE MESSAGE (ODOMETER) --- // Increment the message string for the next attempt // We only touch the variable bytes. // Start at byte 53 (just before the \n) and work backwards if carry needed. // Note: byte_ptr access needs XOR 3 for Endianness correction on arrays treated as words int pos = 53; while (pos >= 12) { u08_t *b = &byte_ptr[pos ^ 3]; (*b)++; if (*b <= 0x7E) { break; // No carry, done incrementing } // Overflow printable range, reset to start of range (0x20) and carry *b = 0x20; pos--; } } }