176 lines
6.1 KiB
Common Lisp
176 lines
6.1 KiB
Common Lisp
//
|
|
// Arquiteturas de Alto Desempenho 2025/2026
|
|
//
|
|
// SHA-1 OpenCL kernel - validation and testing
|
|
//
|
|
|
|
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
|
|
|
|
// SHA-1 functions
|
|
#define SHA1_F1(x,y,z) ((x & y) | (~x & z))
|
|
#define SHA1_K1 0x5A827999u
|
|
#define SHA1_F2(x,y,z) (x ^ y ^ z)
|
|
#define SHA1_K2 0x6ED9EBA1u
|
|
#define SHA1_F3(x,y,z) ((x & y) | (x & z) | (y & z))
|
|
#define SHA1_K3 0x8F1BBCDCu
|
|
#define SHA1_F4(x,y,z) (x ^ y ^ z)
|
|
#define SHA1_K4 0xCA62C1D6u
|
|
|
|
//
|
|
// sha1_ocl_kernel() --- each OpenCL work item computes the SHA-1 secure hash of one message
|
|
//
|
|
// This kernel should only be used to validate the SHA-1 secure hash code in OpenCL
|
|
// It matches the structure of the CUDA sha1_cuda_kernel
|
|
//
|
|
// The data is organized in an interleaved fashion for optimal memory access:
|
|
// For work group size W, the layout is:
|
|
// data[work_group_num][14][W]
|
|
// where data for the same index across all work items in a group are consecutive
|
|
//
|
|
__kernel void sha1_ocl_kernel(__global uint *interleaved_data, __global uint *interleaved_hash)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
uint local_id = get_local_id(0);
|
|
uint local_size = get_local_size(0);
|
|
uint group_id = get_group_id(0);
|
|
|
|
// Adjust pointers for interleaved access pattern
|
|
// Each work group processes local_size messages
|
|
// Within a group, data is interleaved: all work items' data[0], then all data[1], etc.
|
|
__global uint *data = &interleaved_data[group_id * (local_size * 14) + local_id];
|
|
__global uint *hash = &interleaved_hash[group_id * (local_size * 5) + local_id];
|
|
|
|
// Local storage for computation
|
|
uint a, b, c, d, e, w[16];
|
|
uint tmp;
|
|
|
|
// Initial hash values
|
|
a = 0x67452301u;
|
|
b = 0xEFCDAB89u;
|
|
c = 0x98BADCFEu;
|
|
d = 0x10325476u;
|
|
e = 0xC3D2E1F0u;
|
|
|
|
// Load message schedule from interleaved data
|
|
// DATA(idx) accesses data[idx * local_size] to get the correct interleaved value
|
|
for(int i = 0; i < 14; i++)
|
|
w[i] = data[local_size * i];
|
|
w[14] = 0;
|
|
w[15] = 440; // 55 bytes * 8 bits
|
|
|
|
// SHA-1 compression - 80 rounds
|
|
|
|
// Rounds 0-15 (no message schedule expansion needed)
|
|
#define SHA1_STEP(F, K, t) \
|
|
tmp = ROTATE_LEFT(a, 5) + F(b,c,d) + e + w[t] + K; \
|
|
e = d; d = c; c = ROTATE_LEFT(b, 30); b = a; a = tmp;
|
|
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 0);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 1);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 2);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 3);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 4);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 5);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 6);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 7);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 8);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 9);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 10);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 11);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 12);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 13);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 14);
|
|
SHA1_STEP(SHA1_F1, SHA1_K1, 15);
|
|
|
|
#undef SHA1_STEP
|
|
|
|
// Rounds 16-79 with message schedule expansion
|
|
#define SHA1_EXPAND_STEP(F, K, t) \
|
|
tmp = w[(t-3) & 15] ^ w[(t-8) & 15] ^ w[(t-14) & 15] ^ w[(t-16) & 15]; \
|
|
w[t & 15] = ROTATE_LEFT(tmp, 1); \
|
|
tmp = ROTATE_LEFT(a, 5) + F(b,c,d) + e + w[t & 15] + K; \
|
|
e = d; d = c; c = ROTATE_LEFT(b, 30); b = a; a = tmp;
|
|
|
|
// Rounds 16-19 (still using F1)
|
|
SHA1_EXPAND_STEP(SHA1_F1, SHA1_K1, 16);
|
|
SHA1_EXPAND_STEP(SHA1_F1, SHA1_K1, 17);
|
|
SHA1_EXPAND_STEP(SHA1_F1, SHA1_K1, 18);
|
|
SHA1_EXPAND_STEP(SHA1_F1, SHA1_K1, 19);
|
|
|
|
// Rounds 20-39 (F2)
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 20);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 21);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 22);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 23);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 24);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 25);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 26);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 27);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 28);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 29);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 30);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 31);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 32);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 33);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 34);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 35);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 36);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 37);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 38);
|
|
SHA1_EXPAND_STEP(SHA1_F2, SHA1_K2, 39);
|
|
|
|
// Rounds 40-59 (F3)
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 40);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 41);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 42);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 43);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 44);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 45);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 46);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 47);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 48);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 49);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 50);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 51);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 52);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 53);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 54);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 55);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 56);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 57);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 58);
|
|
SHA1_EXPAND_STEP(SHA1_F3, SHA1_K3, 59);
|
|
|
|
// Rounds 60-79 (F4)
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 60);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 61);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 62);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 63);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 64);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 65);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 66);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 67);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 68);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 69);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 70);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 71);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 72);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 73);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 74);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 75);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 76);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 77);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 78);
|
|
SHA1_EXPAND_STEP(SHA1_F4, SHA1_K4, 79);
|
|
|
|
#undef SHA1_EXPAND_STEP
|
|
|
|
// Add to initial hash values and store in interleaved format
|
|
// HASH(idx) writes to hash[idx * local_size]
|
|
hash[local_size * 0] = a + 0x67452301u;
|
|
hash[local_size * 1] = b + 0xEFCDAB89u;
|
|
hash[local_size * 2] = c + 0x98BADCFEu;
|
|
hash[local_size * 3] = d + 0x10325476u;
|
|
hash[local_size * 4] = e + 0xC3D2E1F0u;
|
|
}
|