176 lines
4.8 KiB
Plaintext
176 lines
4.8 KiB
Plaintext
//
|
|
// Arquiteturas de Alto Desempenho 2025/2026
|
|
//
|
|
// DETI Coin Miner - CUDA kernel (optimized for mining)
|
|
//
|
|
|
|
#include "aad_sha1.h"
|
|
|
|
typedef unsigned int u32_t;
|
|
typedef unsigned char u08_t;
|
|
typedef unsigned long long u64_t;
|
|
|
|
//
|
|
// Optimized CUDA kernel for DETI coin mining
|
|
// Each thread generates coins using the same approach as CPU/SIMD miners
|
|
//
|
|
|
|
extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1)
|
|
void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2)
|
|
{
|
|
u32_t coin[14];
|
|
u32_t hash[5];
|
|
u32_t n;
|
|
u08_t *bytes = (u08_t *)coin;
|
|
|
|
// Get thread index (used as offset from base counter)
|
|
n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x;
|
|
|
|
// Initialize coin template: "DETI coin 2 " + variable + "\n\x80"
|
|
// Use byte-swapped format to match host expectations (idx ^ 3)
|
|
coin[0] = 0x44455449u; // "DETI" with byte swap
|
|
coin[1] = 0x20636F69u; // " coi" with byte swap
|
|
coin[2] = 0x6E203220u; // "n 2 " with byte swap
|
|
|
|
// Initialize variable part (positions 12-53, 42 bytes)
|
|
// Start with A-Z pattern like CPU/SIMD miners
|
|
for(int i = 12; i < 54; i++)
|
|
bytes[i ^ 3] = 'A' + ((i - 12) % 26);
|
|
|
|
// End with newline and padding
|
|
bytes[0x36 ^ 3] = '\n'; // Position 54
|
|
bytes[0x37 ^ 3] = 0x80; // Position 55
|
|
|
|
// Calculate offset based on thread index and parameters
|
|
// This creates a unique starting point for each thread
|
|
u64_t offset = ((u64_t)param1 << 32) | param2;
|
|
offset += (u64_t)n;
|
|
|
|
// Apply offset to variable part (increment the coin counter)
|
|
// Use the same carry logic as CPU/SIMD miners
|
|
for(int pos = 53; pos >= 12 && offset > 0; pos--)
|
|
{
|
|
u08_t *byte = &bytes[pos ^ 3];
|
|
u64_t add = offset % 95; // Range: 32-126 (95 values)
|
|
offset /= 95;
|
|
|
|
u32_t val = (*byte - 32 + add);
|
|
*byte = 32 + (val % 95);
|
|
offset += val / 95; // Carry
|
|
}
|
|
|
|
// Compute SHA1 hash
|
|
# define T u32_t
|
|
# define C(c) (c)
|
|
# define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
|
# define DATA(idx) coin[idx]
|
|
# define HASH(idx) hash[idx]
|
|
CUSTOM_SHA1_CODE();
|
|
# undef T
|
|
# undef C
|
|
# undef ROTATE
|
|
# undef DATA
|
|
# undef HASH
|
|
|
|
// Check if this is a valid DETI coin
|
|
if(hash[0] == 0xAAD20250u)
|
|
{
|
|
// Found a coin! Store it atomically
|
|
u32_t idx = atomicAdd(coins_storage_area, 14u);
|
|
|
|
// Make sure we don't write outside buffer
|
|
if(idx < 1024u - 14u)
|
|
{
|
|
// Store the complete coin data
|
|
for(int i = 0; i < 14; i++)
|
|
coins_storage_area[idx + i] = coin[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Kernel that tries all possible values for one character position
|
|
//
|
|
|
|
extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1)
|
|
void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2, int scan_position)
|
|
{
|
|
u32_t coin[14];
|
|
u32_t hash[5];
|
|
u32_t n;
|
|
u08_t *bytes = (u08_t *)coin;
|
|
|
|
n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x;
|
|
|
|
// Initialize coin template (with byte swap for idx ^ 3 convention)
|
|
coin[0] = 0x44455449u; // "DETI" with byte swap
|
|
coin[1] = 0x20636F69u; // " coi" with byte swap
|
|
coin[2] = 0x6E203220u; // "n 2 " with byte swap
|
|
|
|
// Initialize variable part with A-Z pattern
|
|
for(int i = 12; i < 54; i++)
|
|
bytes[i ^ 3] = 'A' + ((i - 12) % 26);
|
|
|
|
// End with newline and padding
|
|
bytes[0x36 ^ 3] = '\n'; // Position 54
|
|
bytes[0x37 ^ 3] = 0x80; // Position 55
|
|
|
|
// Apply base offset from parameters (similar to main kernel)
|
|
u64_t offset = ((u64_t)param1 << 32) | param2;
|
|
offset += (u64_t)n;
|
|
|
|
// Apply offset to all positions except the scan position
|
|
for(int pos = 53; pos >= 12 && offset > 0; pos--)
|
|
{
|
|
if(pos == scan_position)
|
|
continue; // Skip the scan position
|
|
|
|
u08_t *byte = &bytes[pos ^ 3];
|
|
u64_t add = offset % 95;
|
|
offset /= 95;
|
|
|
|
u32_t val = (*byte - 32 + add);
|
|
*byte = 32 + (val % 95);
|
|
offset += val / 95;
|
|
}
|
|
|
|
// Try all possible printable ASCII values for the scan position (32-126)
|
|
for(u32_t val = 32; val < 127; val++)
|
|
{
|
|
// Set the test value at the scan position
|
|
if(scan_position >= 12 && scan_position < 54)
|
|
{
|
|
bytes[scan_position ^ 3] = (u08_t)val;
|
|
|
|
// Skip newline in the middle (it's only valid at position 54)
|
|
if(scan_position != 54 && val == '\n')
|
|
continue;
|
|
}
|
|
|
|
// Compute SHA1 hash
|
|
# define T u32_t
|
|
# define C(c) (c)
|
|
# define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
|
# define DATA(idx) coin[idx]
|
|
# define HASH(idx) hash[idx]
|
|
CUSTOM_SHA1_CODE();
|
|
# undef T
|
|
# undef C
|
|
# undef ROTATE
|
|
# undef DATA
|
|
# undef HASH
|
|
|
|
// Check if valid coin
|
|
if(hash[0] == 0xAAD20250u)
|
|
{
|
|
u32_t idx = atomicAdd(coins_storage_area, 14u);
|
|
if(idx < 1024u - 14u)
|
|
{
|
|
for(int i = 0; i < 14; i++)
|
|
coins_storage_area[idx + i] = coin[i];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|