//
// Tomás Oliveira e Silva,  September 2025
//
// Arquiteturas de Alto Desempenho 2025/2026
//
// template for the computation of the SHA1 secure hash
//


//
// compute the SHA1 secure hash of a custom message with exactly 55 bytes
//
// the general SHA1 secure hash algorithm ingests data in chunks of 64 bytes; at the end there must
// be one byte of padding, with value 0x80, zero or more bytes of zeros, also for padding, appended
// until the last chunk has 56 bytes, and a final 8 byte integer holding the number of bits of the
// entire message
//
// by restricting the data to have 55 bytes or less the algorithm can be simplified, and only one
// chunk is needed; that is what is done below
//

#ifndef AAD_SHA1
#define AAD_SHA1


//
// number of threads in each CUDA block
//
// we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...)
//
#define RECOMENDED_CUDA_BLOCK_SIZE  128


//
// each custom message has exactly 55 bytes, and must be followed by an additional byte with the
// value 0x80
// these 55+1=56 bytes must be stored in a 32-bit integer array with 14 elements as illustrated in
// the test code below; the secure hash has to be interpreted in the same way --- don't blame the
// teacher for this; that is how the SHA1 secure hash is described in the 3174 request for comments
// (https://datatracker.ietf.org/doc/html/rfc3174)
//
// the SHA1 secure hash of the 55 bytes message is computed using a macro called CUSTOM_SHA1_CODE
// it must be customized using the following additional macros:
//   T           --- the data type
//   C(c)        --- how to expand the constant c
//   ROTATE(x,n) --- how to rotate x left by n bits
//   DATA(idx)   --- how to access the data at index idx, 0 <= idx <= 13
//   HASH(idx)   --- how to access the hash at index idx, 0 <= idx <= 4
// see aad_sha1_cpu.h for examples
//
// each custom message is stored in the locations
//   DATA(0), DATA(1), ..., DATA(13)
// each SHA1 secure hash is stored in the locations
//   HASH(0), HASH(1), ..., HASH(4)
//

//
// first group of 20 iterations (0 <= t <= 19)
//
#define SHA1_F1(x,y,z)  ((x & y) | (~x & z))
#define SHA1_K1         0x5A827999u

//
// second group of 20 iterations (20 <= t <= 39)
//
#define SHA1_F2(x,y,z)  (x ^ y ^ z)
#define SHA1_K2         0x6ED9EBA1u

//
// third group of 20 iterations (40 <= t <= 59)
//
#define SHA1_F3(x,y,z)  ((x & y) | (x & z) | (y & z))
#define SHA1_K3         0x8F1BBCDCu

//
// fourth group of 20 iterations (60 <= t <= 79)
//
#define SHA1_F4(x,y,z)  (x ^ y ^ z)
#define SHA1_K4         0xCA62C1D6u

//
// data mixing function
//
#define SHA1_D(t)                                                                            \
  do                                                                                         \
  {                                                                                          \
    T tmp = w[((t) - 3) & 15] ^ w[((t) - 8) & 15] ^ w[((t) - 14) & 15] ^ w[((t) - 16) & 15]; \
    w[(t) & 15] = ROTATE(tmp,1);                                                             \
  }                                                                                          \
  while(0)

//
// state mixing function
//
#define SHA1_S(F,t,K)                                                                        \
  do                                                                                         \
  {                                                                                          \
    T tmp = ROTATE(a,5) + F(b,c,d) + e + w[(t) & 15] + C(K);                                 \
    e = d;                                                                                   \
    d = c;                                                                                   \
    c = ROTATE(b,30);                                                                        \
    b = a;                                                                                   \
    a = tmp;                                                                                 \
  }                                                                                          \
  while(0)

//
// the CUSTOM_SHA1_CODE macro, for a little-endian processor
//
// everything is loop unrolled to make sure all indices are static integers, so the compiler
// has no excuse to produce sub-optimal code (the w[16] array can even become 16 separate
// integer variables, the CUDA compiler actually does this)
//
#define CUSTOM_SHA1_CODE()                                                                  \
  do                                                                                        \
  {                                                                                         \
    /* local variables */                                                                   \
    T a,b,c,d,e,w[16];                                                                      \
    /* initial state */                                                                     \
    a = C(0x67452301u);                                                                     \
    b = C(0xEFCDAB89u);                                                                     \
    c = C(0x98BADCFEu);                                                                     \
    d = C(0x10325476u);                                                                     \
    e = C(0xC3D2E1F0u);                                                                     \
    /* copy data to the internal buffer */                                                  \
    w[ 0] = DATA( 0);                                                                       \
    w[ 1] = DATA( 1);                                                                       \
    w[ 2] = DATA( 2);                                                                       \
    w[ 3] = DATA( 3);                                                                       \
    w[ 4] = DATA( 4);                                                                       \
    w[ 5] = DATA( 5);                                                                       \
    w[ 6] = DATA( 6);                                                                       \
    w[ 7] = DATA( 7);                                                                       \
    w[ 8] = DATA( 8);                                                                       \
    w[ 9] = DATA( 9);                                                                       \
    w[10] = DATA(10);                                                                       \
    w[11] = DATA(11);                                                                       \
    w[12] = DATA(12);                                                                       \
    w[13] = DATA(13); /* WARNING: DATA(13) & 0xFF must be 0x80 (SHA1 padding) */            \
    w[14] = C(0);                                                                           \
    w[15] = C(440); /* the message has 55*8 bits */                                         \
    /* first group of 20 iterations (0 <= t <= 19) */                                       \
                SHA1_S(SHA1_F1, 0,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 1,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 2,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 3,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 4,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 5,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 6,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 7,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 8,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1, 9,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1,10,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1,11,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1,12,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1,13,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1,14,SHA1_K1);                                                 \
                SHA1_S(SHA1_F1,15,SHA1_K1);                                                 \
    SHA1_D(16); SHA1_S(SHA1_F1,16,SHA1_K1);                                                 \
    SHA1_D(17); SHA1_S(SHA1_F1,17,SHA1_K1);                                                 \
    SHA1_D(18); SHA1_S(SHA1_F1,18,SHA1_K1);                                                 \
    SHA1_D(19); SHA1_S(SHA1_F1,19,SHA1_K1);                                                 \
    /* second group of 20 iterations (20 <= t <= 39) */                                     \
    SHA1_D(20); SHA1_S(SHA1_F2,20,SHA1_K2);                                                 \
    SHA1_D(21); SHA1_S(SHA1_F2,21,SHA1_K2);                                                 \
    SHA1_D(22); SHA1_S(SHA1_F2,22,SHA1_K2);                                                 \
    SHA1_D(23); SHA1_S(SHA1_F2,23,SHA1_K2);                                                 \
    SHA1_D(24); SHA1_S(SHA1_F2,24,SHA1_K2);                                                 \
    SHA1_D(25); SHA1_S(SHA1_F2,25,SHA1_K2);                                                 \
    SHA1_D(26); SHA1_S(SHA1_F2,26,SHA1_K2);                                                 \
    SHA1_D(27); SHA1_S(SHA1_F2,27,SHA1_K2);                                                 \
    SHA1_D(28); SHA1_S(SHA1_F2,28,SHA1_K2);                                                 \
    SHA1_D(29); SHA1_S(SHA1_F2,29,SHA1_K2);                                                 \
    SHA1_D(30); SHA1_S(SHA1_F2,30,SHA1_K2);                                                 \
    SHA1_D(31); SHA1_S(SHA1_F2,31,SHA1_K2);                                                 \
    SHA1_D(32); SHA1_S(SHA1_F2,32,SHA1_K2);                                                 \
    SHA1_D(33); SHA1_S(SHA1_F2,33,SHA1_K2);                                                 \
    SHA1_D(34); SHA1_S(SHA1_F2,34,SHA1_K2);                                                 \
    SHA1_D(35); SHA1_S(SHA1_F2,35,SHA1_K2);                                                 \
    SHA1_D(36); SHA1_S(SHA1_F2,36,SHA1_K2);                                                 \
    SHA1_D(37); SHA1_S(SHA1_F2,37,SHA1_K2);                                                 \
    SHA1_D(38); SHA1_S(SHA1_F2,38,SHA1_K2);                                                 \
    SHA1_D(39); SHA1_S(SHA1_F2,39,SHA1_K2);                                                 \
    /* third group of 20 iterations (40 <= t <= 59) */                                      \
    SHA1_D(40); SHA1_S(SHA1_F3,40,SHA1_K3);                                                 \
    SHA1_D(41); SHA1_S(SHA1_F3,41,SHA1_K3);                                                 \
    SHA1_D(42); SHA1_S(SHA1_F3,42,SHA1_K3);                                                 \
    SHA1_D(43); SHA1_S(SHA1_F3,43,SHA1_K3);                                                 \
    SHA1_D(44); SHA1_S(SHA1_F3,44,SHA1_K3);                                                 \
    SHA1_D(45); SHA1_S(SHA1_F3,45,SHA1_K3);                                                 \
    SHA1_D(46); SHA1_S(SHA1_F3,46,SHA1_K3);                                                 \
    SHA1_D(47); SHA1_S(SHA1_F3,47,SHA1_K3);                                                 \
    SHA1_D(48); SHA1_S(SHA1_F3,48,SHA1_K3);                                                 \
    SHA1_D(49); SHA1_S(SHA1_F3,49,SHA1_K3);                                                 \
    SHA1_D(50); SHA1_S(SHA1_F3,50,SHA1_K3);                                                 \
    SHA1_D(51); SHA1_S(SHA1_F3,51,SHA1_K3);                                                 \
    SHA1_D(52); SHA1_S(SHA1_F3,52,SHA1_K3);                                                 \
    SHA1_D(53); SHA1_S(SHA1_F3,53,SHA1_K3);                                                 \
    SHA1_D(54); SHA1_S(SHA1_F3,54,SHA1_K3);                                                 \
    SHA1_D(55); SHA1_S(SHA1_F3,55,SHA1_K3);                                                 \
    SHA1_D(56); SHA1_S(SHA1_F3,56,SHA1_K3);                                                 \
    SHA1_D(57); SHA1_S(SHA1_F3,57,SHA1_K3);                                                 \
    SHA1_D(58); SHA1_S(SHA1_F3,58,SHA1_K3);                                                 \
    SHA1_D(59); SHA1_S(SHA1_F3,59,SHA1_K3);                                                 \
    /* fourth group of 20 iterations (60 <= t <= 79) */                                     \
    SHA1_D(60); SHA1_S(SHA1_F4,60,SHA1_K4);                                                 \
    SHA1_D(61); SHA1_S(SHA1_F4,61,SHA1_K4);                                                 \
    SHA1_D(62); SHA1_S(SHA1_F4,62,SHA1_K4);                                                 \
    SHA1_D(63); SHA1_S(SHA1_F4,63,SHA1_K4);                                                 \
    SHA1_D(64); SHA1_S(SHA1_F4,64,SHA1_K4);                                                 \
    SHA1_D(65); SHA1_S(SHA1_F4,65,SHA1_K4);                                                 \
    SHA1_D(66); SHA1_S(SHA1_F4,66,SHA1_K4);                                                 \
    SHA1_D(67); SHA1_S(SHA1_F4,67,SHA1_K4);                                                 \
    SHA1_D(68); SHA1_S(SHA1_F4,68,SHA1_K4);                                                 \
    SHA1_D(69); SHA1_S(SHA1_F4,69,SHA1_K4);                                                 \
    SHA1_D(70); SHA1_S(SHA1_F4,70,SHA1_K4);                                                 \
    SHA1_D(71); SHA1_S(SHA1_F4,71,SHA1_K4);                                                 \
    SHA1_D(72); SHA1_S(SHA1_F4,72,SHA1_K4);                                                 \
    SHA1_D(73); SHA1_S(SHA1_F4,73,SHA1_K4);                                                 \
    SHA1_D(74); SHA1_S(SHA1_F4,74,SHA1_K4);                                                 \
    SHA1_D(75); SHA1_S(SHA1_F4,75,SHA1_K4);                                                 \
    SHA1_D(76); SHA1_S(SHA1_F4,76,SHA1_K4);                                                 \
    SHA1_D(77); SHA1_S(SHA1_F4,77,SHA1_K4);                                                 \
    SHA1_D(78); SHA1_S(SHA1_F4,78,SHA1_K4);                                                 \
    SHA1_D(79); SHA1_S(SHA1_F4,79,SHA1_K4);                                                 \
    /* update state (in this special case, finish) */                                       \
    HASH(0) = a + C(0x67452301u);                                                           \
    HASH(1) = b + C(0xEFCDAB89u);                                                           \
    HASH(2) = c + C(0x98BADCFEu);                                                           \
    HASH(3) = d + C(0x10325476u);                                                           \
    HASH(4) = e + C(0xC3D2E1F0u);                                                           \
  }                                                                                         \
  while(0)


//
// the end!
//

#endif