256 lines
7.1 KiB
Common Lisp
256 lines
7.1 KiB
Common Lisp
//
|
|
// Arquiteturas de Alto Desempenho 2025/2026
|
|
//
|
|
// DETI Coin Miner - OpenCL kernel
|
|
//
|
|
|
|
// Rotate left for SHA-1
|
|
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
|
|
|
|
// SHA-1 macros
|
|
#define SHA1_F1(x,y,z) ((x & y) | (~x & z))
|
|
#define SHA1_K1 0x5A827999u
|
|
#define SHA1_F2(x,y,z) (x ^ y ^ z)
|
|
#define SHA1_K2 0x6ED9EBA1u
|
|
#define SHA1_F3(x,y,z) ((x & y) | (x & z) | (y & z))
|
|
#define SHA1_K3 0x8F1BBCDCu
|
|
#define SHA1_F4(x,y,z) (x ^ y ^ z)
|
|
#define SHA1_K4 0xCA62C1D6u
|
|
|
|
//
|
|
// SHA-1 implementation matching the template from aad_sha1.h
|
|
//
|
|
void sha1_compute(__private uint *coin, __private uint *hash)
|
|
{
|
|
uint a, b, c, d, e, w[16];
|
|
|
|
// Initial hash values
|
|
a = 0x67452301u;
|
|
b = 0xEFCDAB89u;
|
|
c = 0x98BADCFEu;
|
|
d = 0x10325476u;
|
|
e = 0xC3D2E1F0u;
|
|
|
|
// Load message schedule (first 14 words from coin, then 0, then length)
|
|
for(int i = 0; i < 14; i++)
|
|
w[i] = coin[i];
|
|
w[14] = 0;
|
|
w[15] = 440; // 55 bytes * 8 bits
|
|
|
|
// SHA-1 compression function - 80 rounds
|
|
uint tmp;
|
|
|
|
// Rounds 0-15
|
|
#define ROUND1(t) \
|
|
tmp = ROTATE_LEFT(a, 5) + SHA1_F1(b,c,d) + e + w[t] + SHA1_K1; \
|
|
e = d; d = c; c = ROTATE_LEFT(b, 30); b = a; a = tmp;
|
|
|
|
ROUND1(0); ROUND1(1); ROUND1(2); ROUND1(3);
|
|
ROUND1(4); ROUND1(5); ROUND1(6); ROUND1(7);
|
|
ROUND1(8); ROUND1(9); ROUND1(10); ROUND1(11);
|
|
ROUND1(12); ROUND1(13); ROUND1(14); ROUND1(15);
|
|
|
|
#undef ROUND1
|
|
|
|
// Rounds 16-79 with message schedule
|
|
#define ROUND(F, K, t) \
|
|
tmp = w[(t-3) & 15] ^ w[(t-8) & 15] ^ w[(t-14) & 15] ^ w[(t-16) & 15]; \
|
|
w[t & 15] = ROTATE_LEFT(tmp, 1); \
|
|
tmp = ROTATE_LEFT(a, 5) + F(b,c,d) + e + w[t & 15] + K; \
|
|
e = d; d = c; c = ROTATE_LEFT(b, 30); b = a; a = tmp;
|
|
|
|
ROUND(SHA1_F1, SHA1_K1, 16); ROUND(SHA1_F1, SHA1_K1, 17);
|
|
ROUND(SHA1_F1, SHA1_K1, 18); ROUND(SHA1_F1, SHA1_K1, 19);
|
|
|
|
ROUND(SHA1_F2, SHA1_K2, 20); ROUND(SHA1_F2, SHA1_K2, 21);
|
|
ROUND(SHA1_F2, SHA1_K2, 22); ROUND(SHA1_F2, SHA1_K2, 23);
|
|
ROUND(SHA1_F2, SHA1_K2, 24); ROUND(SHA1_F2, SHA1_K2, 25);
|
|
ROUND(SHA1_F2, SHA1_K2, 26); ROUND(SHA1_F2, SHA1_K2, 27);
|
|
ROUND(SHA1_F2, SHA1_K2, 28); ROUND(SHA1_F2, SHA1_K2, 29);
|
|
ROUND(SHA1_F2, SHA1_K2, 30); ROUND(SHA1_F2, SHA1_K2, 31);
|
|
ROUND(SHA1_F2, SHA1_K2, 32); ROUND(SHA1_F2, SHA1_K2, 33);
|
|
ROUND(SHA1_F2, SHA1_K2, 34); ROUND(SHA1_F2, SHA1_K2, 35);
|
|
ROUND(SHA1_F2, SHA1_K2, 36); ROUND(SHA1_F2, SHA1_K2, 37);
|
|
ROUND(SHA1_F2, SHA1_K2, 38); ROUND(SHA1_F2, SHA1_K2, 39);
|
|
|
|
ROUND(SHA1_F3, SHA1_K3, 40); ROUND(SHA1_F3, SHA1_K3, 41);
|
|
ROUND(SHA1_F3, SHA1_K3, 42); ROUND(SHA1_F3, SHA1_K3, 43);
|
|
ROUND(SHA1_F3, SHA1_K3, 44); ROUND(SHA1_F3, SHA1_K3, 45);
|
|
ROUND(SHA1_F3, SHA1_K3, 46); ROUND(SHA1_F3, SHA1_K3, 47);
|
|
ROUND(SHA1_F3, SHA1_K3, 48); ROUND(SHA1_F3, SHA1_K3, 49);
|
|
ROUND(SHA1_F3, SHA1_K3, 50); ROUND(SHA1_F3, SHA1_K3, 51);
|
|
ROUND(SHA1_F3, SHA1_K3, 52); ROUND(SHA1_F3, SHA1_K3, 53);
|
|
ROUND(SHA1_F3, SHA1_K3, 54); ROUND(SHA1_F3, SHA1_K3, 55);
|
|
ROUND(SHA1_F3, SHA1_K3, 56); ROUND(SHA1_F3, SHA1_K3, 57);
|
|
ROUND(SHA1_F3, SHA1_K3, 58); ROUND(SHA1_F3, SHA1_K3, 59);
|
|
|
|
ROUND(SHA1_F4, SHA1_K4, 60); ROUND(SHA1_F4, SHA1_K4, 61);
|
|
ROUND(SHA1_F4, SHA1_K4, 62); ROUND(SHA1_F4, SHA1_K4, 63);
|
|
ROUND(SHA1_F4, SHA1_K4, 64); ROUND(SHA1_F4, SHA1_K4, 65);
|
|
ROUND(SHA1_F4, SHA1_K4, 66); ROUND(SHA1_F4, SHA1_K4, 67);
|
|
ROUND(SHA1_F4, SHA1_K4, 68); ROUND(SHA1_F4, SHA1_K4, 69);
|
|
ROUND(SHA1_F4, SHA1_K4, 70); ROUND(SHA1_F4, SHA1_K4, 71);
|
|
ROUND(SHA1_F4, SHA1_K4, 72); ROUND(SHA1_F4, SHA1_K4, 73);
|
|
ROUND(SHA1_F4, SHA1_K4, 74); ROUND(SHA1_F4, SHA1_K4, 75);
|
|
ROUND(SHA1_F4, SHA1_K4, 76); ROUND(SHA1_F4, SHA1_K4, 77);
|
|
ROUND(SHA1_F4, SHA1_K4, 78); ROUND(SHA1_F4, SHA1_K4, 79);
|
|
|
|
#undef ROUND
|
|
|
|
// Add to initial values
|
|
hash[0] = a + 0x67452301u;
|
|
hash[1] = b + 0xEFCDAB89u;
|
|
hash[2] = c + 0x98BADCFEu;
|
|
hash[3] = d + 0x10325476u;
|
|
hash[4] = e + 0xC3D2E1F0u;
|
|
}
|
|
|
|
//
|
|
// Basic mining kernel - each work item tries one coin
|
|
//
|
|
__kernel void mine_deti_coins_kernel(__global uint *storage, uint param1, uint param2)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
uint coin[14];
|
|
uint hash[5];
|
|
|
|
// Zero initialize
|
|
for(int i = 0; i < 14; i++)
|
|
coin[i] = 0;
|
|
|
|
// Access as bytes with XOR 3 for endianness (little-endian word, big-endian bytes)
|
|
__private uchar *bytes = (__private uchar *)coin;
|
|
|
|
// Fixed prefix: "DETI coin 2 "
|
|
bytes[0x0 ^ 3] = 'D';
|
|
bytes[0x1 ^ 3] = 'E';
|
|
bytes[0x2 ^ 3] = 'T';
|
|
bytes[0x3 ^ 3] = 'I';
|
|
bytes[0x4 ^ 3] = ' ';
|
|
bytes[0x5 ^ 3] = 'c';
|
|
bytes[0x6 ^ 3] = 'o';
|
|
bytes[0x7 ^ 3] = 'i';
|
|
bytes[0x8 ^ 3] = 'n';
|
|
bytes[0x9 ^ 3] = ' ';
|
|
bytes[0xa ^ 3] = '2';
|
|
bytes[0xb ^ 3] = ' ';
|
|
|
|
// Fixed suffix: newline + padding
|
|
bytes[0x36 ^ 3] = '\n';
|
|
bytes[0x37 ^ 3] = 0x80;
|
|
|
|
// Variable content (42 bytes from position 12 to 53)
|
|
// Generate unique content for each thread
|
|
uint seed = param1 + gid * 0x9E3779B9u;
|
|
uint seed2 = param2 ^ (gid * 0x61C88647u);
|
|
|
|
for(int i = 12; i < 54; i++)
|
|
{
|
|
// LCG + xorshift mixer
|
|
seed = seed * 1664525u + 1013904223u;
|
|
seed2 ^= seed2 << 13;
|
|
seed2 ^= seed2 >> 17;
|
|
seed2 ^= seed2 << 5;
|
|
|
|
uchar val = 32 + ((seed ^ seed2) % 95);
|
|
|
|
// Skip newline character
|
|
if(val == '\n') val = ' ';
|
|
// Ensure we stay in printable range
|
|
if(val >= 127) val = 126;
|
|
|
|
bytes[i ^ 3] = val;
|
|
}
|
|
|
|
// Compute SHA-1
|
|
sha1_compute(coin, hash);
|
|
|
|
// Check for valid DETI coin v2 (hash starts with 0xAAD20250)
|
|
if(hash[0] == 0xAAD20250u)
|
|
{
|
|
// Atomically reserve space and store the coin
|
|
uint idx = atomic_add(&storage[0], 14u);
|
|
|
|
if(idx + 14 <= 1024)
|
|
{
|
|
// Store all 14 words of the coin
|
|
for(int i = 0; i < 14; i++)
|
|
storage[idx + i] = coin[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Scan kernel - each work item tries 256 variations
|
|
//
|
|
__kernel void mine_deti_coins_scan_kernel(__global uint *storage, uint param1, uint param2, int scan_pos)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
uint coin[14];
|
|
uint hash[5];
|
|
|
|
// Initialize coin
|
|
for(int i = 0; i < 14; i++)
|
|
coin[i] = 0;
|
|
|
|
__private uchar *bytes = (__private uchar *)coin;
|
|
|
|
// Fixed parts
|
|
bytes[0x0 ^ 3] = 'D';
|
|
bytes[0x1 ^ 3] = 'E';
|
|
bytes[0x2 ^ 3] = 'T';
|
|
bytes[0x3 ^ 3] = 'I';
|
|
bytes[0x4 ^ 3] = ' ';
|
|
bytes[0x5 ^ 3] = 'c';
|
|
bytes[0x6 ^ 3] = 'o';
|
|
bytes[0x7 ^ 3] = 'i';
|
|
bytes[0x8 ^ 3] = 'n';
|
|
bytes[0x9 ^ 3] = ' ';
|
|
bytes[0xa ^ 3] = '2';
|
|
bytes[0xb ^ 3] = ' ';
|
|
bytes[0x36 ^ 3] = '\n';
|
|
bytes[0x37 ^ 3] = 0x80;
|
|
|
|
// Generate base content unique to this thread
|
|
uint seed = param1 + gid * 0x9E3779B9u;
|
|
uint seed2 = param2 ^ (gid * 0x61C88647u);
|
|
|
|
for(int i = 12; i < 54; i++)
|
|
{
|
|
seed = seed * 1664525u + 1013904223u;
|
|
seed2 ^= seed2 << 13;
|
|
seed2 ^= seed2 >> 17;
|
|
seed2 ^= seed2 << 5;
|
|
|
|
uchar val = 32 + ((seed ^ seed2) % 95);
|
|
if(val == '\n') val = ' ';
|
|
if(val >= 127) val = 126;
|
|
|
|
bytes[i ^ 3] = val;
|
|
}
|
|
|
|
// Validate scan_pos
|
|
if(scan_pos < 12 || scan_pos >= 54)
|
|
scan_pos = 12;
|
|
|
|
// Scan through all printable ASCII values at scan_pos
|
|
for(uint c = 32; c < 127; c++)
|
|
{
|
|
if(c == '\n') continue; // Skip newline
|
|
|
|
bytes[scan_pos ^ 3] = (uchar)c;
|
|
|
|
sha1_compute(coin, hash);
|
|
|
|
if(hash[0] == 0xAAD20250u)
|
|
{
|
|
uint idx = atomic_add(&storage[0], 14u);
|
|
if(idx + 14 <= 1024)
|
|
{
|
|
for(int i = 0; i < 14; i++)
|
|
storage[idx + i] = coin[i];
|
|
}
|
|
}
|
|
}
|
|
}
|