127 lines
4.1 KiB
Plaintext
127 lines
4.1 KiB
Plaintext
//
|
|
// Arquiteturas de Alto Desempenho 2025/2026
|
|
//
|
|
// DETI Coin Miner - CUDA kernel (Optimized)
|
|
//
|
|
|
|
#include "aad_sha1.h"
|
|
#include "aad_data_types.h"
|
|
|
|
//
|
|
// Optimized CUDA kernel
|
|
//
|
|
extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE, 1)
|
|
void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread)
|
|
{
|
|
u32_t coin[16]; // SHA1 requires 16 words (64 bytes)
|
|
u32_t hash[5];
|
|
|
|
// 1. Initialize Fixed Prefix: "DETI coin 2 " (12 bytes)
|
|
// We construct this directly into the u32 array.
|
|
// Note: We assume the system is Little Endian, but SHA1 input via macro usually handles bytes.
|
|
// Ideally, we pack bytes: 'D','E','T','I' -> 0x44455449
|
|
|
|
// Word 0: "DETI"
|
|
coin[0] = (u32_t)'D' << 24 | (u32_t)'E' << 16 | (u32_t)'T' << 8 | (u32_t)'I';
|
|
// Word 1: " coi"
|
|
coin[1] = (u32_t)' ' << 24 | (u32_t)'c' << 16 | (u32_t)'o' << 8 | (u32_t)'i';
|
|
// Word 2: "n 2 "
|
|
coin[2] = (u32_t)'n' << 24 | (u32_t)' ' << 16 | (u32_t)'2' << 8 | (u32_t)' ';
|
|
|
|
// 2. Initialize Variable Part (Bytes 12 to 53)
|
|
// Fill with a safe printable char ' ' (0x20)
|
|
#pragma unroll
|
|
for(int i = 3; i <= 12; i++) {
|
|
coin[i] = 0x20202020;
|
|
}
|
|
// Word 13 is partial variable + suffix
|
|
// Bytes 52, 53 are variable. Byte 54 is '\n', Byte 55 is 0x80 (Padding)
|
|
coin[13] = 0x20200A80;
|
|
|
|
// 3. Initialize SHA1 Length Padding
|
|
// Message is 55 bytes. Length in bits = 55 * 8 = 440.
|
|
// SHA1 puts length at the very end (Word 15).
|
|
coin[14] = 0x00000000;
|
|
coin[15] = 440;
|
|
|
|
// 4. Thread Unique Initialization
|
|
// We use the thread ID to set the initial state of the variable bytes
|
|
// to ensure every thread starts at a different point.
|
|
u64_t thread_id = (u64_t)blockIdx.x * blockDim.x + threadIdx.x;
|
|
u64_t nonce_offset = base_nonce + thread_id * attempts_per_thread;
|
|
|
|
// "Seeding" the message with the nonce (Fast update of specific bytes)
|
|
// We modify the bytes in words 3 through 12.
|
|
// Accessing as byte pointer for easier manipulation
|
|
u08_t *byte_ptr = (u08_t*)coin;
|
|
|
|
// Apply the nonce offset to the message structure (Odometer setup)
|
|
// Start modifying from byte 12
|
|
u64_t temp_nonce = nonce_offset;
|
|
for (int k = 12; k < 54 && temp_nonce > 0; k++) {
|
|
u32_t val = byte_ptr[k ^ 3] + (temp_nonce % 95); // mod 95 to stay in printable ASCII
|
|
temp_nonce /= 95;
|
|
|
|
if (val > 0x7E) { // Wrap around printable range
|
|
val -= 95;
|
|
temp_nonce++; // Carry
|
|
}
|
|
byte_ptr[k ^ 3] = (u08_t)val;
|
|
}
|
|
|
|
// 5. Mining Loop
|
|
for(u32_t attempt = 0; attempt < attempts_per_thread; attempt++)
|
|
{
|
|
// --- SHA1 HASH CALCULATION ---
|
|
#define T u32_t
|
|
#define C(c) (c)
|
|
#define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
|
#define DATA(idx) coin[idx]
|
|
#define HASH(idx) hash[idx]
|
|
|
|
CUSTOM_SHA1_CODE();
|
|
|
|
#undef T
|
|
#undef C
|
|
#undef ROTATE
|
|
#undef DATA
|
|
#undef HASH
|
|
|
|
// --- CHECK RESULT ---
|
|
// Check for "aad20250" prefix (AAD20250 hex)
|
|
if(hash[0] == 0xAAD20250u)
|
|
{
|
|
// Found a candidate! Save it.
|
|
u32_t idx = atomicAdd(&coins_storage_area[0], 14u);
|
|
|
|
// Boundary check (first word is count, data starts at index 1)
|
|
// We normalize the index to be relative to storage start
|
|
if(idx < 1024u - 15u) // Ensure space
|
|
{
|
|
// Store valid coin (14 words = 56 bytes, covers the 55 byte content)
|
|
// Adjust idx because coins_storage_area[0] is the counter
|
|
for(int w=0; w<14; w++) {
|
|
coins_storage_area[idx + w] = coin[w];
|
|
}
|
|
}
|
|
}
|
|
|
|
// --- UPDATE MESSAGE (ODOMETER) ---
|
|
// Increment the message string for the next attempt
|
|
// We only touch the variable bytes.
|
|
// Start at byte 53 (just before the \n) and work backwards if carry needed.
|
|
// Note: byte_ptr access needs XOR 3 for Endianness correction on arrays treated as words
|
|
|
|
int pos = 53;
|
|
while (pos >= 12) {
|
|
u08_t *b = &byte_ptr[pos ^ 3];
|
|
(*b)++;
|
|
if (*b <= 0x7E) {
|
|
break; // No carry, done incrementing
|
|
}
|
|
// Overflow printable range, reset to start of range (0x20) and carry
|
|
*b = 0x20;
|
|
pos--;
|
|
}
|
|
}
|
|
} |