commit 6586f9d4c29b61fdd9ebcf15634749532a356418 Author: RubenCGomes Date: Mon Oct 6 11:35:12 2025 +0100 init comiit diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/copilot.data.migration.agent.xml b/.idea/copilot.data.migration.agent.xml new file mode 100644 index 0000000..4ea72a9 --- /dev/null +++ b/.idea/copilot.data.migration.agent.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/copilot.data.migration.ask.xml b/.idea/copilot.data.migration.ask.xml new file mode 100644 index 0000000..7ef04e2 --- /dev/null +++ b/.idea/copilot.data.migration.ask.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/copilot.data.migration.ask2agent.xml b/.idea/copilot.data.migration.ask2agent.xml new file mode 100644 index 0000000..1f2ea11 --- /dev/null +++ b/.idea/copilot.data.migration.ask2agent.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/copilot.data.migration.edit.xml b/.idea/copilot.data.migration.edit.xml new file mode 100644 index 0000000..8648f94 --- /dev/null +++ b/.idea/copilot.data.migration.edit.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/editor.xml b/.idea/editor.xml new file mode 100644 index 0000000..ead1d8a --- /dev/null +++ b/.idea/editor.xml @@ -0,0 +1,248 @@ + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..e558998 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,48 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..dd7d771 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,21 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/AAD_A1.pdf b/AAD_A1.pdf new file mode 100644 index 0000000..44ec3ac Binary files /dev/null and b/AAD_A1.pdf differ diff --git a/aad_cuda_utilities.h b/aad_cuda_utilities.h new file mode 100644 index 0000000..28a79e3 --- /dev/null +++ b/aad_cuda_utilities.h @@ -0,0 +1,270 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// CUDA driver API stuff +// + +#ifndef AAD_CUDA_UTILITIES +#define AAD_CUDA_UTILITIES + +#include + + +// +// data type used to store all CUDA related stuff +// + +#define MAX_N_ARGUMENTS 4 + +typedef struct +{ + // input data + int device_number; // number of the device to initialize + char *cubin_file_name; // name of the cubin file to load (NULL if not needed) + char *kernel_name; // name of the CUDA kernel to load (NULL if not needed) + u32_t data_size[2]; // the number of bytes of the two data arrays to allocate on the host and on the device (0 if not needed) + // persistent data + CUdevice cu_device; // the device yhandle + char device_name[256]; // the device name + CUcontext cu_context; // the device context + CUmodule cu_module; // the loaded cubin file contents + CUfunction cu_kernel; // the pointer to the CUDA kernel + CUstream cu_stream; // the command stream + void *host_data[2]; // the pointers to the host data + CUdeviceptr device_data[2]; // the pointers to the device data + // launch kernel data + unsigned int grid_dim_x; // the number of grid blocks (in the X dimension, the only one we will use here) + unsigned int block_dim_x; // the number of threads in a block (in the X dimension, the only one we will use here, should be equal to RECOMENDED_CUDA_BLOCK_SIZE) + int n_kernel_arguments; // number of kernel arguments + void *arg[MAX_N_ARGUMENTS]; // pointers to the kernel argument data + +} +cuda_data_t; + + +// +// CU_CALL --- macro that should be used to call a CUDA driver API function and to test its return value +// +// it should be used to test the return value of calls such as +// cuInit(device_number); +// cuDeviceGet(&cu_device,device_number); +// +// in these cases, f_name is, respectively, cuInit and cuDeviceGet, and args is, respectively, +// (device_number) and (&cu_device,device_number) +// + +#define CU_CALL(f_name,args) \ + do \ + { \ + CUresult e = f_name args; \ + if(e != CUDA_SUCCESS) \ + { /* the call failed, terminate the program */ \ + fprintf(stderr,"" # f_name "() returned %s (file %s, line %d)\n",cu_error_string(e),__FILE__,__LINE__); \ + exit(1); \ + } \ + } \ + while(0) + + +// +// terse description of the CUDA error codes (replacement of the error code number by its name) +// + +static const char *cu_error_string(CUresult e) +{ + static char error_string[64]; +# define CASE(error_code) case error_code: return "" # error_code; + switch((int)e) + { // list of error codes extracted from cuda.h (TODO: /usr/local/cuda-10.2/targets/x86_64-linux/include/CL) + default: sprintf(error_string,"unknown error code (%d)",(int)e); return(error_string); + CASE(CUDA_SUCCESS ); + CASE(CUDA_ERROR_INVALID_VALUE ); + CASE(CUDA_ERROR_OUT_OF_MEMORY ); + CASE(CUDA_ERROR_NOT_INITIALIZED ); + CASE(CUDA_ERROR_DEINITIALIZED ); + CASE(CUDA_ERROR_PROFILER_DISABLED ); + CASE(CUDA_ERROR_PROFILER_NOT_INITIALIZED ); + CASE(CUDA_ERROR_PROFILER_ALREADY_STARTED ); + CASE(CUDA_ERROR_PROFILER_ALREADY_STOPPED ); + CASE(CUDA_ERROR_NO_DEVICE ); + CASE(CUDA_ERROR_INVALID_DEVICE ); + CASE(CUDA_ERROR_INVALID_IMAGE ); + CASE(CUDA_ERROR_INVALID_CONTEXT ); + CASE(CUDA_ERROR_CONTEXT_ALREADY_CURRENT ); + CASE(CUDA_ERROR_MAP_FAILED ); + CASE(CUDA_ERROR_UNMAP_FAILED ); + CASE(CUDA_ERROR_ARRAY_IS_MAPPED ); + CASE(CUDA_ERROR_ALREADY_MAPPED ); + CASE(CUDA_ERROR_NO_BINARY_FOR_GPU ); + CASE(CUDA_ERROR_ALREADY_ACQUIRED ); + CASE(CUDA_ERROR_NOT_MAPPED ); + CASE(CUDA_ERROR_NOT_MAPPED_AS_ARRAY ); + CASE(CUDA_ERROR_NOT_MAPPED_AS_POINTER ); + CASE(CUDA_ERROR_ECC_UNCORRECTABLE ); + CASE(CUDA_ERROR_UNSUPPORTED_LIMIT ); + CASE(CUDA_ERROR_CONTEXT_ALREADY_IN_USE ); + CASE(CUDA_ERROR_PEER_ACCESS_UNSUPPORTED ); + CASE(CUDA_ERROR_INVALID_PTX ); + CASE(CUDA_ERROR_INVALID_GRAPHICS_CONTEXT ); + CASE(CUDA_ERROR_NVLINK_UNCORRECTABLE ); + CASE(CUDA_ERROR_INVALID_SOURCE ); + CASE(CUDA_ERROR_FILE_NOT_FOUND ); + CASE(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND); + CASE(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ); + CASE(CUDA_ERROR_OPERATING_SYSTEM ); + CASE(CUDA_ERROR_INVALID_HANDLE ); + CASE(CUDA_ERROR_NOT_FOUND ); + CASE(CUDA_ERROR_NOT_READY ); + CASE(CUDA_ERROR_ILLEGAL_ADDRESS ); + CASE(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ); + CASE(CUDA_ERROR_LAUNCH_TIMEOUT ); + CASE(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ); + CASE(CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ); + CASE(CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ); + CASE(CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ); + CASE(CUDA_ERROR_CONTEXT_IS_DESTROYED ); + CASE(CUDA_ERROR_ASSERT ); + CASE(CUDA_ERROR_TOO_MANY_PEERS ); + CASE(CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED); + CASE(CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ); + CASE(CUDA_ERROR_HARDWARE_STACK_ERROR ); + CASE(CUDA_ERROR_ILLEGAL_INSTRUCTION ); + CASE(CUDA_ERROR_MISALIGNED_ADDRESS ); + CASE(CUDA_ERROR_INVALID_ADDRESS_SPACE ); + CASE(CUDA_ERROR_INVALID_PC ); + CASE(CUDA_ERROR_LAUNCH_FAILED ); + CASE(CUDA_ERROR_NOT_PERMITTED ); + CASE(CUDA_ERROR_NOT_SUPPORTED ); + CASE(CUDA_ERROR_UNKNOWN ); + }; +# undef CASE +} + + +// +// synchonize the stream command buffer +// + +static void synchronize_cuda(cuda_data_t *cd) +{ + CU_CALL( cuStreamSynchronize , (cd->cu_stream) ); +} + +// +// initialize the CUDA driver API interface +// +// load a single cubin file, with a single CUDA kernel +// allocate up to two storage areas both on the host and on the device +// + +static void initialize_cuda(cuda_data_t *cd) +{ + // + // initialize the driver API interface + // + CU_CALL( cuInit , (0) ); + // + // open the CUDA device + // + CU_CALL( cuDeviceGet , (&cd->cu_device,cd->device_number) ); + // + // get information about the CUDA device + // + CU_CALL( cuDeviceGetName , (cd->device_name,(int)sizeof(cd->device_name) - 1,cd->cu_device) ); + printf("initialize_cuda(): CUDA code running on a %s (device %d, CUDA %u.%u.%u)\n",cd->device_name,cd->device_number,CUDA_VERSION / 1000,(CUDA_VERSION / 10) % 100,CUDA_VERSION % 10); + // + // create a context + // + CU_CALL( cuCtxCreate , (&cd->cu_context,CU_CTX_SCHED_YIELD,cd->cu_device) ); + CU_CALL( cuCtxSetCacheConfig , (CU_FUNC_CACHE_PREFER_L1) ); + // + // load precompiled modules + // + CU_CALL( cuModuleLoad , (&cd->cu_module,cd->cubin_file_name) ); + // + // get the kernel function pointers + // + CU_CALL( cuModuleGetFunction, (&cd->cu_kernel,cd->cu_module,cd->kernel_name) ); + // + // create a command stream (we could have used the default stream) + // + CU_CALL( cuStreamCreate, (&cd->cu_stream,CU_STREAM_NON_BLOCKING) ); + // + // allocate host and device memory + // + for(int i = 0;i < 2;i++) + if(cd->data_size[i] > 0u) + { + CU_CALL( cuMemAllocHost , ((void **)&cd->host_data[i] ,(size_t)cd->data_size[i]) ); + CU_CALL( cuMemAlloc , (&cd->device_data[i],(size_t)cd->data_size[i]) ); + } + else + cd->host_data[i] = NULL; + // + // catch any lingering errors + // + synchronize_cuda(cd); +} + + +// +// terminate the CUDA driver API interface +// + +static void terminate_cuda(cuda_data_t *cd) +{ + CU_CALL( cuStreamDestroy, (cd->cu_stream) ); + for(int i = 0;i < 2;i++) + if(cd->data_size[i] > 0u) + { + CU_CALL( cuMemFreeHost , (cd->host_data[i]) ); + CU_CALL( cuMemFree , (cd->device_data[i]) ); + } + CU_CALL( cuModuleUnload , (cd->cu_module) ); + CU_CALL( cuCtxDestroy , (cd->cu_context) ); +} + + +// +// copy data from the host to the device and from the device to the host +// + +static void host_to_device_copy(cuda_data_t *cd,int idx) +{ + if(idx < 0 || idx > 1 || cd->data_size[idx] == 0u) + { + fprintf(stderr,"host_to_device_copy(): bad idx\n"); + exit(1); + } + CU_CALL( cuMemcpyHtoD , (cd->device_data[idx],(void *)cd->host_data[idx],(size_t)cd->data_size[idx]) ); + synchronize_cuda(cd); +} + +static void device_to_host_copy(cuda_data_t *cd,int idx) +{ + if(idx < 0 || idx > 1 || cd->data_size[idx] == 0u) + { + fprintf(stderr,"device_to_host_copy(): bad idx\n"); + exit(1); + } + CU_CALL( cuMemcpyDtoH , ((void *)cd->host_data[idx],cd->device_data[idx],(size_t)cd->data_size[idx]) ); + synchronize_cuda(cd); +} + + + +// +// launch a CUDA kernel (with 0 bytes of shared memory and no extra options) +// + +static void lauch_kernel(cuda_data_t *cd) +{ + if(cd->block_dim_x != (unsigned int)RECOMENDED_CUDA_BLOCK_SIZE) + fprintf(stderr,"lauch_kernel(): block_dim_x should be equal to %d\n",RECOMENDED_CUDA_BLOCK_SIZE); + CU_CALL( cuLaunchKernel , (cd->cu_kernel,cd->grid_dim_x,1u,1u,cd->block_dim_x,1u,1u,0u,cd->cu_stream,&cd->arg[0],NULL) ); + synchronize_cuda(cd); +} + +#endif diff --git a/aad_data_types.h b/aad_data_types.h new file mode 100644 index 0000000..051be1b --- /dev/null +++ b/aad_data_types.h @@ -0,0 +1,48 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// integer data types +// + +#ifndef AAD_DATA_TYPES +#define AAD_DATA_TYPES + +// +// scalar data types (for a typical 64-bit processor) +// + +typedef signed char s08_t; // 8-bit signed integer +typedef unsigned char u08_t; // 8-bit unsigned integer +typedef signed short s16_t; // 16-bit signed integer +typedef unsigned short u16_t; // 16-bit unsigned integer +typedef signed int s32_t; // 32-bit signed integer +typedef unsigned int u32_t; // 32-bit unsigned integer +typedef signed long s64_t; // 64-bit signed integer +typedef unsigned long u64_t; // 64-bit unsigned integer + + +// +// vector data types (this probably will only work on the gcc compiler) +// + +#if defined(__AVX__) +typedef int v4si __attribute__((vector_size(16))) __attribute__((aligned(16))); +#endif +#if defined(__AVX2__) +typedef int v8si __attribute__((vector_size(32))) __attribute__((aligned(32))); +#endif +#if defined(__AVX512F__) +typedef int v16si __attribute__((vector_size(64))) __attribute__((aligned(64))); +#endif +#if defined(__ARM_NEON) +# include +#endif + + +// +// the end! +// + +#endif diff --git a/aad_sha1.h b/aad_sha1.h new file mode 100644 index 0000000..54c4fe3 --- /dev/null +++ b/aad_sha1.h @@ -0,0 +1,240 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// template for the computation of the SHA1 secure hash +// + + +// +// compute the SHA1 secure hash of a custom message with exactly 55 bytes +// +// the general SHA1 secure hash algorithm ingests data in chunks of 64 bytes; at the end there must +// be one byte of padding, with value 0x80, zero or more bytes of zeros, also for padding, appended +// until the last chunk has 56 bytes, and a final 8 byte integer holding the number of bits of the +// entire message +// +// by restricting the data to have 55 bytes or less the algorithm can be simplified, and only one +// chunk is needed; that is what is done below +// + +#ifndef AAD_SHA1 +#define AAD_SHA1 + + +// +// number of threads in each CUDA block +// +// we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...) +// +#define RECOMENDED_CUDA_BLOCK_SIZE 128 + + +// +// each custom message has exactly 55 bytes, and must be followed by an additional byte with the +// value 0x80 +// these 55+1=56 bytes must be stored in a 32-bit integer array with 14 elements as illustrated in +// the test code below; the secure hash has to be interpreted in the same way --- don't blame the +// teacher for this; that is how the SHA1 secure hash is described in the 3174 request for comments +// (https://datatracker.ietf.org/doc/html/rfc3174) +// +// the SHA1 secure hash of the 55 bytes message is computed using a macro called CUSTOM_SHA1_CODE +// it must be customized using the following additional macros: +// T --- the data type +// C(c) --- how to expand the constant c +// ROTATE(x,n) --- how to rotate x left by n bits +// DATA(idx) --- how to access the data at index idx, 0 <= idx <= 13 +// HASH(idx) --- how to access the hash at index idx, 0 <= idx <= 4 +// see aad_sha1_cpu.h for examples +// +// each custom message is stored in the locations +// DATA(0), DATA(1), ..., DATA(13) +// each SHA1 secure hash is stored in the locations +// HASH(0), HASH(1), ..., HASH(4) +// + +// +// first group of 20 iterations (0 <= t <= 19) +// +#define SHA1_F1(x,y,z) ((x & y) | (~x & z)) +#define SHA1_K1 0x5A827999u + +// +// second group of 20 iterations (20 <= t <= 39) +// +#define SHA1_F2(x,y,z) (x ^ y ^ z) +#define SHA1_K2 0x6ED9EBA1u + +// +// third group of 20 iterations (40 <= t <= 59) +// +#define SHA1_F3(x,y,z) ((x & y) | (x & z) | (y & z)) +#define SHA1_K3 0x8F1BBCDCu + +// +// fourth group of 20 iterations (60 <= t <= 79) +// +#define SHA1_F4(x,y,z) (x ^ y ^ z) +#define SHA1_K4 0xCA62C1D6u + +// +// data mixing function +// +#define SHA1_D(t) \ + do \ + { \ + T tmp = w[((t) - 3) & 15] ^ w[((t) - 8) & 15] ^ w[((t) - 14) & 15] ^ w[((t) - 16) & 15]; \ + w[(t) & 15] = ROTATE(tmp,1); \ + } \ + while(0) + +// +// state mixing function +// +#define SHA1_S(F,t,K) \ + do \ + { \ + T tmp = ROTATE(a,5) + F(b,c,d) + e + w[(t) & 15] + C(K); \ + e = d; \ + d = c; \ + c = ROTATE(b,30); \ + b = a; \ + a = tmp; \ + } \ + while(0) + +// +// the CUSTOM_SHA1_CODE macro, for a little-endian processor +// +// everything is loop unrolled to make sure all indices are static integers, so the compiler +// has no excuse to produce sub-optimal code (the w[16] array can even become 16 separate +// integer variables, the CUDA compiler actually does this) +// +#define CUSTOM_SHA1_CODE() \ + do \ + { \ + /* local variables */ \ + T a,b,c,d,e,w[16]; \ + /* initial state */ \ + a = C(0x67452301u); \ + b = C(0xEFCDAB89u); \ + c = C(0x98BADCFEu); \ + d = C(0x10325476u); \ + e = C(0xC3D2E1F0u); \ + /* copy data to the internal buffer */ \ + w[ 0] = DATA( 0); \ + w[ 1] = DATA( 1); \ + w[ 2] = DATA( 2); \ + w[ 3] = DATA( 3); \ + w[ 4] = DATA( 4); \ + w[ 5] = DATA( 5); \ + w[ 6] = DATA( 6); \ + w[ 7] = DATA( 7); \ + w[ 8] = DATA( 8); \ + w[ 9] = DATA( 9); \ + w[10] = DATA(10); \ + w[11] = DATA(11); \ + w[12] = DATA(12); \ + w[13] = DATA(13); /* WARNING: DATA(13) & 0xFF must be 0x80 (SHA1 padding) */ \ + w[14] = C(0); \ + w[15] = C(440); /* the message has 55*8 bits */ \ + /* first group of 20 iterations (0 <= t <= 19) */ \ + SHA1_S(SHA1_F1, 0,SHA1_K1); \ + SHA1_S(SHA1_F1, 1,SHA1_K1); \ + SHA1_S(SHA1_F1, 2,SHA1_K1); \ + SHA1_S(SHA1_F1, 3,SHA1_K1); \ + SHA1_S(SHA1_F1, 4,SHA1_K1); \ + SHA1_S(SHA1_F1, 5,SHA1_K1); \ + SHA1_S(SHA1_F1, 6,SHA1_K1); \ + SHA1_S(SHA1_F1, 7,SHA1_K1); \ + SHA1_S(SHA1_F1, 8,SHA1_K1); \ + SHA1_S(SHA1_F1, 9,SHA1_K1); \ + SHA1_S(SHA1_F1,10,SHA1_K1); \ + SHA1_S(SHA1_F1,11,SHA1_K1); \ + SHA1_S(SHA1_F1,12,SHA1_K1); \ + SHA1_S(SHA1_F1,13,SHA1_K1); \ + SHA1_S(SHA1_F1,14,SHA1_K1); \ + SHA1_S(SHA1_F1,15,SHA1_K1); \ + SHA1_D(16); SHA1_S(SHA1_F1,16,SHA1_K1); \ + SHA1_D(17); SHA1_S(SHA1_F1,17,SHA1_K1); \ + SHA1_D(18); SHA1_S(SHA1_F1,18,SHA1_K1); \ + SHA1_D(19); SHA1_S(SHA1_F1,19,SHA1_K1); \ + /* second group of 20 iterations (20 <= t <= 39) */ \ + SHA1_D(20); SHA1_S(SHA1_F2,20,SHA1_K2); \ + SHA1_D(21); SHA1_S(SHA1_F2,21,SHA1_K2); \ + SHA1_D(22); SHA1_S(SHA1_F2,22,SHA1_K2); \ + SHA1_D(23); SHA1_S(SHA1_F2,23,SHA1_K2); \ + SHA1_D(24); SHA1_S(SHA1_F2,24,SHA1_K2); \ + SHA1_D(25); SHA1_S(SHA1_F2,25,SHA1_K2); \ + SHA1_D(26); SHA1_S(SHA1_F2,26,SHA1_K2); \ + SHA1_D(27); SHA1_S(SHA1_F2,27,SHA1_K2); \ + SHA1_D(28); SHA1_S(SHA1_F2,28,SHA1_K2); \ + SHA1_D(29); SHA1_S(SHA1_F2,29,SHA1_K2); \ + SHA1_D(30); SHA1_S(SHA1_F2,30,SHA1_K2); \ + SHA1_D(31); SHA1_S(SHA1_F2,31,SHA1_K2); \ + SHA1_D(32); SHA1_S(SHA1_F2,32,SHA1_K2); \ + SHA1_D(33); SHA1_S(SHA1_F2,33,SHA1_K2); \ + SHA1_D(34); SHA1_S(SHA1_F2,34,SHA1_K2); \ + SHA1_D(35); SHA1_S(SHA1_F2,35,SHA1_K2); \ + SHA1_D(36); SHA1_S(SHA1_F2,36,SHA1_K2); \ + SHA1_D(37); SHA1_S(SHA1_F2,37,SHA1_K2); \ + SHA1_D(38); SHA1_S(SHA1_F2,38,SHA1_K2); \ + SHA1_D(39); SHA1_S(SHA1_F2,39,SHA1_K2); \ + /* third group of 20 iterations (40 <= t <= 59) */ \ + SHA1_D(40); SHA1_S(SHA1_F3,40,SHA1_K3); \ + SHA1_D(41); SHA1_S(SHA1_F3,41,SHA1_K3); \ + SHA1_D(42); SHA1_S(SHA1_F3,42,SHA1_K3); \ + SHA1_D(43); SHA1_S(SHA1_F3,43,SHA1_K3); \ + SHA1_D(44); SHA1_S(SHA1_F3,44,SHA1_K3); \ + SHA1_D(45); SHA1_S(SHA1_F3,45,SHA1_K3); \ + SHA1_D(46); SHA1_S(SHA1_F3,46,SHA1_K3); \ + SHA1_D(47); SHA1_S(SHA1_F3,47,SHA1_K3); \ + SHA1_D(48); SHA1_S(SHA1_F3,48,SHA1_K3); \ + SHA1_D(49); SHA1_S(SHA1_F3,49,SHA1_K3); \ + SHA1_D(50); SHA1_S(SHA1_F3,50,SHA1_K3); \ + SHA1_D(51); SHA1_S(SHA1_F3,51,SHA1_K3); \ + SHA1_D(52); SHA1_S(SHA1_F3,52,SHA1_K3); \ + SHA1_D(53); SHA1_S(SHA1_F3,53,SHA1_K3); \ + SHA1_D(54); SHA1_S(SHA1_F3,54,SHA1_K3); \ + SHA1_D(55); SHA1_S(SHA1_F3,55,SHA1_K3); \ + SHA1_D(56); SHA1_S(SHA1_F3,56,SHA1_K3); \ + SHA1_D(57); SHA1_S(SHA1_F3,57,SHA1_K3); \ + SHA1_D(58); SHA1_S(SHA1_F3,58,SHA1_K3); \ + SHA1_D(59); SHA1_S(SHA1_F3,59,SHA1_K3); \ + /* fourth group of 20 iterations (60 <= t <= 79) */ \ + SHA1_D(60); SHA1_S(SHA1_F4,60,SHA1_K4); \ + SHA1_D(61); SHA1_S(SHA1_F4,61,SHA1_K4); \ + SHA1_D(62); SHA1_S(SHA1_F4,62,SHA1_K4); \ + SHA1_D(63); SHA1_S(SHA1_F4,63,SHA1_K4); \ + SHA1_D(64); SHA1_S(SHA1_F4,64,SHA1_K4); \ + SHA1_D(65); SHA1_S(SHA1_F4,65,SHA1_K4); \ + SHA1_D(66); SHA1_S(SHA1_F4,66,SHA1_K4); \ + SHA1_D(67); SHA1_S(SHA1_F4,67,SHA1_K4); \ + SHA1_D(68); SHA1_S(SHA1_F4,68,SHA1_K4); \ + SHA1_D(69); SHA1_S(SHA1_F4,69,SHA1_K4); \ + SHA1_D(70); SHA1_S(SHA1_F4,70,SHA1_K4); \ + SHA1_D(71); SHA1_S(SHA1_F4,71,SHA1_K4); \ + SHA1_D(72); SHA1_S(SHA1_F4,72,SHA1_K4); \ + SHA1_D(73); SHA1_S(SHA1_F4,73,SHA1_K4); \ + SHA1_D(74); SHA1_S(SHA1_F4,74,SHA1_K4); \ + SHA1_D(75); SHA1_S(SHA1_F4,75,SHA1_K4); \ + SHA1_D(76); SHA1_S(SHA1_F4,76,SHA1_K4); \ + SHA1_D(77); SHA1_S(SHA1_F4,77,SHA1_K4); \ + SHA1_D(78); SHA1_S(SHA1_F4,78,SHA1_K4); \ + SHA1_D(79); SHA1_S(SHA1_F4,79,SHA1_K4); \ + /* update state (in this special case, finish) */ \ + HASH(0) = a + C(0x67452301u); \ + HASH(1) = b + C(0xEFCDAB89u); \ + HASH(2) = c + C(0x98BADCFEu); \ + HASH(3) = d + C(0x10325476u); \ + HASH(4) = e + C(0xC3D2E1F0u); \ + } \ + while(0) + + +// +// the end! +// + +#endif diff --git a/aad_sha1_cpu.h b/aad_sha1_cpu.h new file mode 100644 index 0000000..ce5fc3f --- /dev/null +++ b/aad_sha1_cpu.h @@ -0,0 +1,141 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// SHA1 secure hash implementations on the CPU +// + +#ifndef AAD_SHA1_CPU +#define AAD_SHA1_CPU + +#include "aad_sha1.h" +#define FOUR(c) (int)(c),(int)(c),(int)(c),(int)(c) + + +// +// reference implementation (no SIMD instructions) +// + +__attribute__((unused)) +static void sha1(u32_t *data,u32_t *hash) +{ // one message -> one SHA1 hash +# define T u32_t +# define C(c) (c) +# define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) +# define DATA(idx) data[idx] +# define HASH(idx) hash[idx] + CUSTOM_SHA1_CODE(); +# undef T +# undef C +# undef ROTATE +# undef DATA +# undef HASH +} + + +// +// implementation using avx instructions (Intel/AMD) +// + +#if defined(__AVX__) + +__attribute__((unused)) +static void sha1_avx(v4si *interleaved4_data,v4si *interleaved4_hash) +{ // four interleaved messages -> four interleaved SHA1 secure hashes +# define T v4si +# define C(c) (v4si){ FOUR(c) } +# define ROTATE(x,n) (__builtin_ia32_pslldi128(x,n) | __builtin_ia32_psrldi128(x,32 - (n))) +# define DATA(idx) interleaved4_data[idx] +# define HASH(idx) interleaved4_hash[idx] + CUSTOM_SHA1_CODE(); +# undef T +# undef C +# undef ROTATE +# undef DATA +# undef HASH +} + +#endif + + +// +// implementation using avx2 instructions (Intel/AMD) +// + +#if defined(__AVX2__) + +__attribute__((unused)) +static void sha1_avx2(v8si *interleaved8_data,v8si *interleaved8_hash) +{ // eight interleaved messages -> eight interleaved SHA1 secure hashes +# define T v8si +# define C(c) (v8si){ FOUR(c),FOUR(c) } +# define ROTATE(x,n) (__builtin_ia32_pslldi256(x,n) | __builtin_ia32_psrldi256(x,32 - (n))) +# define DATA(idx) interleaved8_data[idx] +# define HASH(idx) interleaved8_hash[idx] + CUSTOM_SHA1_CODE(); +# undef T +# undef C +# undef ROTATE +# undef DATA +# undef HASH +} + +#endif + + +// +// implementation using avx512f instructions (Intel/AMD) +// + +#if defined(__AVX512F__) + +__attribute__((unused)) +static void sha1_avx512f(v16si *interleaved16_data,v16si *interleaved16_hash) +{ // sixteen interleaved messages -> sixteen interleaved SHA1 secure hashes +# define T v16si +# define C(c) (v16si){ FOUR(c),FOUR(c),FOUR(c),FOUR(c) } +# define ROTATE(x,n) __builtin_ia32_prold512_mask(x,n,x,0xFFFF) +# define DATA(idx) interleaved16_data[idx] +# define HASH(idx) interleaved16_hash[idx] + CUSTOM_SHA1_CODE(); +# undef T +# undef C +# undef ROTATE +# undef DATA +# undef HASH +} + +#endif + + +// +// implementation using neon instructions (ARM) +// + +#if defined(__ARM_NEON) + +__attribute__((unused)) +static void sha1_neon(uint32x4_t *interleaved4_data,uint32x4_t *interleaved4_hash) +{ // four interleaved messages -> four interleaved SHA1 secure hashes +# define T uint32x4_t +# define C(c) (uint32x4_t){ FOUR(c) } +# define ROTATE(x,n) (vshlq_n_u32(x,n) | vshrq_n_u32(x,32 - (n))) +# define DATA(idx) interleaved4_data[idx] +# define HASH(idx) interleaved4_hash[idx] + CUSTOM_SHA1_CODE(); +# undef T +# undef C +# undef ROTATE +# undef DATA +# undef HASH +} + +#endif + + +// +// the end! +// + +#endif diff --git a/aad_sha1_cpu_tests.c b/aad_sha1_cpu_tests.c new file mode 100644 index 0000000..10c989a --- /dev/null +++ b/aad_sha1_cpu_tests.c @@ -0,0 +1,416 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// + +#include +#include +#include +#include +#include "aad_data_types.h" +#include "aad_utilities.h" +#include "aad_sha1_cpu.h" + +// +// test the reference implementation +// + +static void test_sha1(int n_tests,int n_measurements) +{ + static union { u08_t c[14 * 4]; u32_t i[14]; } data; // the data as bytes and as 32-bit integers + static union { u08_t c[ 5 * 4]; u32_t i[ 5]; } hash; // the hash as bytes and as 32-bit integers + char command[320]; // 320 is more than enough + char response[64]; // 64 is more than enough + char computed[64]; // 64 is more than enough + double hashes_per_second; + int n,i,idx; + u32_t sum; + FILE *fp; + + // test + response[40] = '\0'; + for(n = 0;n < n_tests;n++) + { + // create random data (55 bytes) + for(i = 0;i < 55;i++) + data.c[i ^ 3] = random_byte(); + // append padding (a SHA1 thing...) + data.c[55 ^ 3] = 0x80; + // compute its SHA1 secure hash + sha1(&data.i[0],&hash.i[0]); + // convert the secure hash into a string + idx = 0; + for(i = 0;i < 20;i++) + idx += sprintf(&computed[idx],"%02x",(int)hash.c[i ^ 3] & 0xFF); + if(idx >= (int)sizeof(computed)) + { + fprintf(stderr,"computed[] is too small\n"); + exit(1); + } + // construct the command to test the SHA1 secure hash + idx = sprintf(&command[0],"/bin/echo -en '"); // do not rely on the bash echo builtin command + for(i = 0;i < 55;i++) + idx += sprintf(&command[idx],"\\x%02x",data.c[i ^ 3]); + idx += sprintf(&command[idx],"' | sha1sum"); + if(idx >= (int)sizeof(command)) + { + fprintf(stderr,"command[] is too small\n"); + exit(1); + } + // run it and get its output + fp = popen(command,"r"); + if(fp == NULL) + { + fprintf(stderr,"popen() failed\n"); + exit(1); + } + if(fread((void *)&response[0],sizeof(char),(size_t)40,fp) != (size_t)40) + { + fprintf(stderr,"fread() failed\n"); + exit(1); + } + pclose(fp); + // compare them + if(memcmp((void *)response,(void *)computed,(size_t)40) != 0) + { // print everything + fprintf(stderr,"sha1() failure for n=%d:\n",n); + for(i = 0;i < 55;i++) + fprintf(stderr," message[%2d] = %02x\n",i,(int)data.c[i ^ 3] & 0xFF); + for(i = 0;i < 20;i++) + fprintf(stderr," hash[%2d] = %02x\n",i,(int)hash.c[i ^ 3] % 0xFF); + fprintf(stderr," sha1sum output: %s\n",response); + fprintf(stderr," sha1() output: %s\n",computed); + for(i = 0;i < 40 && response[i] == computed[i];i++) + ; + fprintf(stderr," mismatch at %d\n",i); + exit(1); + } + } + // warmup (turbo boost...) + for(i = n = 0;i < 1000000;i++) + n += (int)random_byte(); + if(n == 0) + fprintf(stderr,"sha1(): this should not be possible, n=0\n"); + // measure + time_measurement(); + sum = 0u; + for(n = 0;n < n_measurements;n++) + { + data.i[0]++; + sha1(&data.i[0],&hash.i[0]); + sum += hash.i[4]; + } + time_measurement(); + if(sum == 0u) + fprintf(stderr,"sha1(): what a coincidence, sum=0\n"); + hashes_per_second = (double)n_measurements / cpu_time_delta(); + // report + printf("sha1() passed (%d test%s, %.0f secure hashes per second)\n",n_tests,(n_tests == 1) ? "" : "s",hashes_per_second); +} + + +// +// test the avx implementation +// + +#if defined(__AVX__) + +static void test_sha1_avx(int n_tests,int n_measurements) +{ +#define N_LANES 4 + static union { u08_t c[14 * 4]; u32_t i[14]; } data[N_LANES]; // the data as bytes and as 32-bit integers + static union { u08_t c[ 5 * 4]; u32_t i[ 5]; } hash[N_LANES]; // the hash as bytes and as 32-bit integers + static u32_t interleaved_data[14][N_LANES] __attribute__((aligned(16))); + static u32_t interleaved_hash[5][N_LANES] __attribute__((aligned(16))); + double hashes_per_second; + int n,i,lane; + u32_t sum; + + // test + for(n = 0;n < n_tests;n++) + { + // the data and the secure hash for the reference implementation + for(lane = 0;lane < N_LANES;lane++) + { + // create random data (55 bytes) + for(i = 0;i < 55;i++) + data[lane].c[i ^ 3] = random_byte(); + // append padding (a SHA1 thing...) + data[lane].c[55 ^ 3] = 0x80; + // compute its SHA1 secure hash + sha1(&data[lane].i[0],&hash[lane].i[0]); + } + // interleave (transpose) the data for the avx implementation + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 14;i++) + interleaved_data[i][lane] = data[lane].i[i]; + // compute the four secure hashes in one go + sha1_avx((v4si *)&interleaved_data[0],(v4si *)&interleaved_hash[0]); + // test + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 5;i++) + if(interleaved_hash[i][lane] != hash[lane].i[i]) + { + fprintf(stderr,"sha1_avx() failure for n=%d (bad/good):\n",n); + for(i = 0;i < 5;i++) + for(lane = 0;lane < N_LANES;lane++) + fprintf(stderr,"%s%08X/%08X%s",(lane == 0) ? " " : " ",interleaved_hash[i][lane] ,hash[lane].i[i],(lane == N_LANES - 1) ? "\n" : ""); + exit(1); + } + } + // measure + time_measurement(); + sum = 0u; + for(n = 0;n < n_measurements;n++) + { + interleaved_data[0][0]++; + sha1(&data[lane].i[0],&hash[lane].i[0]); + sum += interleaved_hash[4][0]; + } + time_measurement(); + if(sum == 0u) + fprintf(stderr,"sha1_avx(): what a coincidence, sum=0\n"); + hashes_per_second = (double)n_measurements * (double)N_LANES / cpu_time_delta(); + // report + printf("sha1_avx() passed (%d test%s, %.0f secure hashes per second)\n",n_tests,(n_tests == 1) ? "" : "s",hashes_per_second); +# undef N_LANES +} + +#endif + + +// +// test the avx2 implementation +// + +#if defined(__AVX2__) + +static void test_sha1_avx2(int n_tests,int n_measurements) +{ +#define N_LANES 8 + static union { u08_t c[14 * 4]; u32_t i[14]; } data[N_LANES]; // the data as bytes and as 32-bit integers + static union { u08_t c[ 5 * 4]; u32_t i[ 5]; } hash[N_LANES]; // the hash as bytes and as 32-bit integers + static u32_t interleaved_data[14][N_LANES] __attribute__((aligned(32))); + static u32_t interleaved_hash[5][N_LANES] __attribute__((aligned(32))); + double hashes_per_second; + int n,i,lane; + u32_t sum; + + // test + for(n = 0;n < n_tests;n++) + { + // the data and the secure hash for the reference implementation + for(lane = 0;lane < N_LANES;lane++) + { + // create random data (55 bytes) + for(i = 0;i < 55;i++) + data[lane].c[i ^ 3] = random_byte(); + // append padding (a SHA1 thing...) + data[lane].c[55 ^ 3] = 0x80; + // compute its SHA1 secure hash + sha1(&data[lane].i[0],&hash[lane].i[0]); + } + // interleave (transpose) the data for the avx2 implementation + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 14;i++) + interleaved_data[i][lane] = data[lane].i[i]; + // compute the eight secure hashes in one go + sha1_avx2((v8si *)&interleaved_data[0],(v8si *)&interleaved_hash[0]); + // test + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 5;i++) + if(interleaved_hash[i][lane] != hash[lane].i[i]) + { + fprintf(stderr,"sha1_avx2() failure for n=%d (bad/good):\n",n); + for(i = 0;i < 5;i++) + for(lane = 0;lane < N_LANES;lane++) + fprintf(stderr,"%s%08X/%08X%s",(lane == 0) ? " " : " ",interleaved_hash[i][lane] ,hash[lane].i[i],(lane == N_LANES - 1) ? "\n" : ""); + exit(1); + } + } + // measure + time_measurement(); + sum = 0u; + for(n = 0;n < n_measurements;n++) + { + interleaved_data[0][0]++; + sha1(&data[lane].i[0],&hash[lane].i[0]); + sum += interleaved_hash[4][0]; + } + time_measurement(); + if(sum == 0u) + fprintf(stderr,"sha1_avx2(): what a coincidence, sum=0\n"); + hashes_per_second = (double)n_measurements * (double)N_LANES / cpu_time_delta(); + // report + printf("sha1_avx2() passed (%d test%s, %.0f secure hashes per second)\n",n_tests,(n_tests == 1) ? "" : "s",hashes_per_second); +# undef N_LANES +} + +#endif + + +// +// test the avx512f implementation +// + +#if defined(__AVX512F__) + +static void test_sha1_avx512f(int n_tests,int n_measurements) +{ +#define N_LANES 16 + static union { u08_t c[14 * 4]; u32_t i[14]; } data[N_LANES]; // the data as bytes and as 32-bit integers + static union { u08_t c[ 5 * 4]; u32_t i[ 5]; } hash[N_LANES]; // the hash as bytes and as 32-bit integers + static u32_t interleaved_data[14][N_LANES] __attribute__((aligned(64))); + static u32_t interleaved_hash[5][N_LANES] __attribute__((aligned(64))); + double hashes_per_second; + int n,i,lane; + u32_t sum; + + // test + for(n = 0;n < n_tests;n++) + { + // the data and the secure hash for the reference implementation + for(lane = 0;lane < N_LANES;lane++) + { + // create random data (55 bytes) + for(i = 0;i < 55;i++) + data[lane].c[i ^ 3] = random_byte(); + // append padding (a SHA1 thing...) + data[lane].c[55 ^ 3] = 0x80; + // compute its SHA1 secure hash + sha1(&data[lane].i[0],&hash[lane].i[0]); + } + // interleave (transpose) the data for the avx512f implementation + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 14;i++) + interleaved_data[i][lane] = data[lane].i[i]; + // compute the sixteen secure hashes in one go + sha1_avx512f((v16si *)&interleaved_data[0],(v16si *)&interleaved_hash[0]); + // test + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 5;i++) + if(interleaved_hash[i][lane] != hash[lane].i[i]) + { + fprintf(stderr,"sha1_avx512f() failure for n=%d (bad/good):\n",n); + for(i = 0;i < 5;i++) + for(lane = 0;lane < N_LANES;lane++) + fprintf(stderr,"%s%08X/%08X%s",(lane == 0) ? " " : " ",interleaved_hash[i][lane] ,hash[lane].i[i],(lane == N_LANES - 1) ? "\n" : ""); + exit(1); + } + } + // measure + time_measurement(); + sum = 0u; + for(n = 0;n < n_measurements;n++) + { + interleaved_data[0][0]++; + sha1(&data[lane].i[0],&hash[lane].i[0]); + sum += interleaved_hash[4][0]; + } + time_measurement(); + if(sum == 0u) + fprintf(stderr,"sha1_avx512f(): what a coincidence, sum=0\n"); + hashes_per_second = (double)n_measurements * (double)N_LANES / cpu_time_delta(); + // report + printf("sha1_avx512f() passed (%d test%s, %.0f secure hashes per second)\n",n_tests,(n_tests == 1) ? "" : "s",hashes_per_second); +# undef N_LANES +} + +#endif + + +// +// test the neon implementation +// + +#if defined(__ARM_NEON) + +static void test_sha1_neon(int n_tests,int n_measurements) +{ +#define N_LANES 4 + static union { u08_t c[14 * 4]; u32_t i[14]; } data[N_LANES]; // the data as bytes and as 32-bit integers + static union { u08_t c[ 5 * 4]; u32_t i[ 5]; } hash[N_LANES]; // the hash as bytes and as 32-bit integers + static u32_t interleaved_data[14][N_LANES] __attribute__((aligned(16))); + static u32_t interleaved_hash[5][N_LANES] __attribute__((aligned(16))); + double hashes_per_second; + int n,i,lane; + u32_t sum; + + // test + for(n = 0;n < n_tests;n++) + { + // the data and the secure hash for the reference implementation + for(lane = 0;lane < N_LANES;lane++) + { + // create random data (55 bytes) + for(i = 0;i < 55;i++) + data[lane].c[i ^ 3] = random_byte(); + // append padding (a SHA1 thing...) + data[lane].c[55 ^ 3] = 0x80; + // compute its SHA1 secure hash + sha1(&data[lane].i[0],&hash[lane].i[0]); + } + // interleave (transpose) the data for the neon implementation + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 14;i++) + interleaved_data[i][lane] = data[lane].i[i]; + // compute the four secure hashes in one go + sha1_neon((uint32x4_t *)&interleaved_data[0],(uint32x4_t *)&interleaved_hash[0]); + // test + for(lane = 0;lane < N_LANES;lane++) + for(i = 0;i < 5;i++) + if(interleaved_hash[i][lane] != hash[lane].i[i]) + { + fprintf(stderr,"sha1_neon() failure for n=%d (bad/good):\n",n); + for(i = 0;i < 5;i++) + for(lane = 0;lane < N_LANES;lane++) + fprintf(stderr,"%s%08X/%08X%s",(lane == 0) ? " " : " ",interleaved_hash[i][lane] ,hash[lane].i[i],(lane == N_LANES - 1) ? "\n" : ""); + exit(1); + } + } + // measure + time_measurement(); + sum = 0u; + for(n = 0;n < n_measurements;n++) + { + interleaved_data[0][0]++; + sha1(&data[lane].i[0],&hash[lane].i[0]); + sum += interleaved_hash[4][0]; + } + time_measurement(); + if(sum == 0u) + fprintf(stderr,"sha1_neon(): what a coincidence, sum=0\n"); + hashes_per_second = (double)n_measurements * (double)N_LANES / cpu_time_delta(); + // report + printf("sha1_neon() passed (%d test%s, %.0f secure hashes per second)\n",n_tests,(n_tests == 1) ? "" : "s",hashes_per_second); +# undef N_LANES +} + +#endif + + +// +// main program +// + +int main(void) +{ + int n_tests = 1000; + int n_measurements = 10000000; + + test_sha1(n_tests,n_measurements); +#if defined(__AVX__) + test_sha1_avx(n_tests,n_measurements); +#endif +#if defined(__AVX2__) + test_sha1_avx2(n_tests,n_measurements); +#endif +#if defined(__AVX512F__) + test_sha1_avx512f(n_tests,n_measurements); +#endif +#if defined(__ARM_NEON) + test_sha1_neon(n_tests,n_measurements); +#endif + return 0; +} diff --git a/aad_sha1_cuda_kernel.cu b/aad_sha1_cuda_kernel.cu new file mode 100644 index 0000000..9d67b94 --- /dev/null +++ b/aad_sha1_cuda_kernel.cu @@ -0,0 +1,84 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// + + +// +// sha1_cuda_kernel() --- each CUDA thread computes the SHA1 secure hash of one message +// +// this kernel should only be used to validate the SHA1 secure hash code in CUDA +// + +#include "aad_sha1.h" + +typedef unsigned int u32_t; + +// +// the nvcc compiler stores w[] in registers (constant indices!) +// +// global thread number: n = threadIdx.x + blockDim.x * blockIdx.x +// global warp number: n >> 5 +// warp thread number: n & 31 -- the lane +// + +extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1) +void sha1_cuda_kernel(u32_t *interleaved32_data,u32_t *interleaved32_hash) +{ + u32_t n; + + // + // get the global thread number (to make things easier, only the x dimension is used) + // + n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x; + // + // adjust data and hash pointers; together with the DATA and HASH macros below, these pointer adjustments ensure that + // the 32 threads of a warp access consecutive memory addresses; for one warp addresses grow from the left to the + // right, and then from top to bottom + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // | | | | | | + // | data[ 0] for lane 0 | data[ 0] for lane 1 | ..... | data[ 0] for lane 30 | data[ 0] for lane 31 | + // | | | | | | + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // | | | | | | + // | data[ 1] for lane 0 | data[ 1] for lane 1 | ..... | data[ 1] for lane 30 | data[ 1] for lane 31 | + // | | | | | | + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // ... + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // | | | | | | + // | data[13] for lane 0 | data[13] for lane 1 | ..... | data[13] for lane 30 | data[13] for lane 31 | + // | | | | | | + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // this is followed by the data for the next warp + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // | | | | | | + // | data[ 0] for lane 0 | data[ 0] for lane 1 | ..... | data[ 0] for lane 30 | data[ 0] for lane 31 | + // | | | | | | + // +----------------------+----------------------+- ... -+----------------------+----------------------+ + // ... + // And so on. The interleaved32_data is CONCEPTUALLY organized in the following way + // interleaved32_data[number_of_warps] [14] [32] + // [warp_number] [idx] [lane] + // for the same warp number and the same idx, the data for the 32 lanes (warp thread number) are in consecutive addresses + // + // the same happens for the interleaved32_hash, but the indices go only from 0 to 4 + // + interleaved32_data = &interleaved32_data[(n >> 5u) * (32u * 14u) + (n & 31u)]; + interleaved32_hash = &interleaved32_hash[(n >> 5u) * (32u * 5u) + (n & 31u)]; + // + // compute the SHA1 secure hash + // +# define T u32_t +# define C(c) (c) +# define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) +# define DATA(idx) interleaved32_data[32u * (idx)] +# define HASH(idx) interleaved32_hash[32u * (idx)] + CUSTOM_SHA1_CODE(); +# undef T +# undef C +# undef ROTATE +# undef DATA +# undef HASH +} diff --git a/aad_sha1_cuda_test.c b/aad_sha1_cuda_test.c new file mode 100644 index 0000000..4a73c47 --- /dev/null +++ b/aad_sha1_cuda_test.c @@ -0,0 +1,105 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// + +#include +#include +#include +#include +#include "aad_data_types.h" +#include "aad_utilities.h" +#include "aad_sha1_cpu.h" +#include "aad_cuda_utilities.h" + +static void test_sha1_cuda(int n_tests) +{ + u32_t n,*interleaved32_data,*interleaved32_hash,data[14],hash[5],good_hash[5]; + double host_to_device_time,kernel_time,device_to_host_time,hashes_per_second; + cuda_data_t cd; + + if(n_tests <= 0 || n_tests > (1 << 24) || n_tests % RECOMENDED_CUDA_BLOCK_SIZE != 0) + { + fprintf(stderr,"test_sha1_cuda(): bad number of tests\n"); + exit(1); + } + // initialize + cd.device_number = 0; // first device + cd.cubin_file_name = "sha1_cuda_kernel.cubin"; + cd.kernel_name = "sha1_cuda_kernel"; + cd.data_size[0] = (u32_t)n_tests * (u32_t)14 * (u32_t)sizeof(u32_t); // size of the data array + cd.data_size[1] = (u32_t)n_tests * (u32_t) 5 * (u32_t)sizeof(u32_t); // size of the hash array + fprintf(stderr,"test_sha1_cuda(): %.3f MiB bytes for the interleaved32_data[] array\n",(double)cd.data_size[0] / (double)(1 << 20)); + fprintf(stderr,"test_sha1_cuda(): %.3f MiB bytes for the interleaved32_hash[] array\n",(double)cd.data_size[1] / (double)(1 << 20)); + initialize_cuda(&cd); + interleaved32_data = (u32_t *)cd.host_data[0]; + interleaved32_hash = (u32_t *)cd.host_data[1]; + // random interleaved32_data + n = cd.data_size[0]; + while(n != 0u) + ((u08_t *)interleaved32_data)[--n] = random_byte(); + // run SHA1 in the CUDA device + time_measurement(); + host_to_device_copy(&cd,0); // idx=0 means that the interleaved32_data is copied to the CUDA device + time_measurement(); + host_to_device_time = wall_time_delta(); + cd.grid_dim_x = (u32_t)n_tests / (u32_t)RECOMENDED_CUDA_BLOCK_SIZE; + cd.block_dim_x = (u32_t)RECOMENDED_CUDA_BLOCK_SIZE; + cd.n_kernel_arguments = 2; + cd.arg[0] = &cd.device_data[0]; // interleaved32_data + cd.arg[1] = &cd.device_data[1]; // interleaved32_hash + time_measurement(); + lauch_kernel(&cd); + time_measurement(); + kernel_time = wall_time_delta(); + time_measurement(); + device_to_host_copy(&cd,1); // idx=1 means that the interleaved32_hash is copied to the host + time_measurement(); + device_to_host_time = wall_time_delta(); + // test + for(n = 0;n < n_tests;n++) + { + // deinterleave the data and the hash + // on the CUDA side, the data for each warp is clustered together; what follows must match what is in the CUDA kernel + // each warp has 32 threads + int warp_number = n / 32; + int lane = n % 32; + for(int idx = 0;idx < 14;idx++) + data[idx] = interleaved32_data[32 * 14 * warp_number + 32 * idx + lane]; + for(int idx = 0;idx < 5;idx++) + hash[idx] = interleaved32_hash[32 * 5 * warp_number + 32 * idx + lane]; + // compute the SHA1 secure hahs on the cpu + sha1(&data[0],&good_hash[0]); + // compare them + for(int idx = 0;idx < 5;idx++) + if(hash[idx] != good_hash[idx]) + { + fprintf(stderr,"test_sha1_cuda() failed for n=%d\n",n); + for(idx = 0;idx < 14;idx++) + fprintf(stderr,"%2d 0x%08X\n",idx,data[idx]); + fprintf(stderr,"---\n"); + for(idx = 0;idx < 5;idx++) + fprintf(stderr,"%2d 0x%08X 0x%08X\n",idx,good_hash[idx],hash[idx]); + exit(1); + } + } + // cleanup + terminate_cuda(&cd); + hashes_per_second = (double)n_tests / kernel_time; + printf("sha1_cuda_kernel() passed (%d test%s, %.0f secure hashes per second)\n",n_tests,(n_tests == 1) ? "" : "s",hashes_per_second); + printf(" host -> device --- %.6f seconds\n",host_to_device_time); + printf(" kernel ----------- %.6f seconds\n",kernel_time); + printf(" device -> host --- %.6f seconds\n",device_to_host_time); +} + + +// +// main program +// + +int main(void) +{ + test_sha1_cuda(128 * 65536); + return 0; +} diff --git a/aad_utilities.h b/aad_utilities.h new file mode 100644 index 0000000..5950e27 --- /dev/null +++ b/aad_utilities.h @@ -0,0 +1,65 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// useful functions (all functions are marked with the unused attribute) +// the compiler will not complain if they are not actually used in the code +// + +#ifndef AAD_UTILITIES +#define AAD_UTILITIES + +// +// measure elapsed and wall times --- requires +// +// warning: Linux and macOS only, if clock_gettime() is not available, consider using clock() +// + +static struct timespec measured_cpu_time[2],measured_wall_time[2]; + +__attribute__((unused)) +static void time_measurement(void) +{ + measured_cpu_time[0] = measured_cpu_time[1]; + (void)clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&measured_cpu_time[1]); + measured_wall_time[0] = measured_wall_time[1]; + (void)clock_gettime(CLOCK_MONOTONIC_RAW,&measured_wall_time[1]); +} + +__attribute__((unused)) +static double cpu_time_delta(void) +{ + return ((double)measured_cpu_time[1].tv_sec - (double)measured_cpu_time[0].tv_sec) + + 1.0e-9 * ((double)measured_cpu_time[1].tv_nsec - (double)measured_cpu_time[0].tv_nsec); +} + +__attribute__((unused)) +static double wall_time_delta(void) +{ + return ((double)measured_wall_time[1].tv_sec - (double)measured_wall_time[0].tv_sec) + + 1.0e-9 * ((double)measured_wall_time[1].tv_nsec - (double)measured_wall_time[0].tv_nsec); +} + + +// +// linear congruential pseudo-random number generator with period 2^32 +// see, for example, https://en.wikipedia.org/wiki/Linear_congruential_generator +// not good for cryptographic applications, but good enough to generate test data +// + +__attribute__((unused)) +u08_t random_byte(void) +{ + static u32_t x = 0u; + + x = 3134521u * x + 1u; + return (u08_t)x; +} + + +// +// the end! +// + +#endif diff --git a/aad_vault.h b/aad_vault.h new file mode 100644 index 0000000..f8acb92 --- /dev/null +++ b/aad_vault.h @@ -0,0 +1,145 @@ +// +// Tomás Oliveira e Silva, September 2025 +// +// Arquiteturas de Alto Desempenho 2025/2026 +// +// implements a vault for all found DETI coins +// + +#ifndef AAD_VAULT +#define AAD_VAULT + +static void save_coin(u32_t coin[14]) +{ +# define VAULT_FILE_NAME "deti_coins_v2_vault.txt" +# define MAX_SAVED_COINS 65536u + static u08_t saved_coins[MAX_SAVED_COINS][4 + 55]; + static u32_t n_saved_coins = 0u; + static u08_t deti_coin_v2_template[56u] = + { // non-zero entries are mandatory, the others are arbitrary + [ 0u] = (u08_t)'D', + [ 1u] = (u08_t)'E', + [ 2u] = (u08_t)'T', + [ 3u] = (u08_t)'I', + [ 4u] = (u08_t)' ', + [ 5u] = (u08_t)'c', + [ 6u] = (u08_t)'o', + [ 7u] = (u08_t)'i', + [ 8u] = (u08_t)'n', + [ 9u] = (u08_t)' ', + [10u] = (u08_t)'2', + [11u] = (u08_t)' ', + [54u] = (u08_t)'\n', + [55u] = (u08_t)0x80 + }; + static int error_tolerance_count = 4; // number of errors to tolerate before bailing out + u32_t idx,n,hash[5]; + char *reason; + u08_t *s; + + // + // handle a NULL argument (meaning: save all stored DETI coins) or an already full buffer + // + if(coin == NULL || n_saved_coins == MAX_SAVED_COINS) + { + if(n_saved_coins > 0u) + { + FILE *fp = fopen(VAULT_FILE_NAME,"a"); + if(fp == NULL || + fwrite((void *)&saved_coins[0][0],(size_t)(4 + 55),(size_t)n_saved_coins,fp) != (size_t)n_saved_coins || + fflush(fp) != 0 || + fclose(fp) != 0) + { + fprintf(stderr,"save_coin(): error while updating file \"" VAULT_FILE_NAME "\"\n"); + exit(1); + } + } + n_saved_coins = 0u; + } + if(coin == NULL) + return; + // + // compute the SHA1 secure hash + // + sha1(coin,hash); + // + // make sure that the coin has the appropriate format + // + for(idx = 0u;idx < 56u;idx++) + if((deti_coin_v2_template[idx] != (u08_t)0 && deti_coin_v2_template[idx] != ((u08_t *)coin)[idx ^ 3]) || (idx >= 12u && idx <= 53u && ((char *)coin)[idx ^ 3] == '\n')) + { + reason = "coin does not match the template"; +error: + fprintf(stderr,"save_coin(): bad DETI coin v2 format (%s)\n",reason); + fprintf(stderr," coin contents\n"); + fprintf(stderr," idx template coin\n"); + fprintf(stderr," --- --------- ---------\n"); + for(idx = 0u;idx < 56u;idx++) + { + u08_t t = deti_coin_v2_template[idx]; + u08_t c = ((u08_t *)coin)[idx ^ 3]; + fprintf(stderr," %3u",idx); + if(t == (u08_t)0) + fprintf(stderr," arbitrary"); + else if(t == '\n') + fprintf(stderr," 0x%02X '\\n'",(int)t); + else if(t >= 32 && t <= 126) + fprintf(stderr," 0x%02X '%c'",(int)t,t); + else + fprintf(stderr," 0x%02X ",(int)t); + fprintf(stderr," 0x%02X",(int)c); + if(c == '\n') + fprintf(stderr," '\\n'"); + else if(c == '\b') + fprintf(stderr," '\\b'"); + else if(c >= 32 && c <= 126) + fprintf(stderr," '%c'",c); + else + fprintf(stderr," "); + if((t != (u08_t)0 && t != c) || (idx >= 12u && idx <= 53u && c == '\n')) + fprintf(stderr," error"); + fprintf(stderr,"\n"); + } + fprintf(stderr," --- --------- ---------\n"); + fprintf(stderr," SHA1 secure hash\n"); + fprintf(stderr," idx value\n"); + fprintf(stderr," --- ----------\n"); + for(idx = 0u;idx < 5u;idx++) + fprintf(stderr," %u 0x%08X%s\n",idx,hash[idx],(idx == 0u && hash[idx] != 0xAAD20250u) ? " error" : ""); + fprintf(stderr," --- ----------\n\n"); + if(--error_tolerance_count < 0) + exit(1); // too many errors, exit + return; // ignore this coin + } + // + // chech the DETI coin v2 signature + // + if(hash[0] != 0xAAD20250u) + { + reason = "bad coin signature"; + goto error; + } + // + // count the number of leading zeros bits of the last 4 32-bit words of the SHA1 secure hash + // + for(n = 0u;n < 128u;n++) + if((hash[1u + n / 32u] >> (31u - n % 32u)) % 2u != 0u) + break; + // + // save the coin in the buffer + // format of each line: "Vuv:" "coin_data" where u and v are ascii digits that encode, in base 10, the reported power of the coin + // + if(n > 99u) + n = 99u; + s = &saved_coins[n_saved_coins++][0]; + *s++ = (u08_t)'V'; + *s++ = (u08_t)('0' + n / 10u); + *s++ = (u08_t)('0' + n % 10u); + *s++ = (u08_t)':'; + for(idx = 0u;idx < 55u;idx++) + *s++ = ((u08_t *)coin)[idx ^ 3]; +# undef VAULT_FILE_NAME +# undef MAX_SAVED_COINS +} + +#endif diff --git a/deti_coin_example.txt b/deti_coin_example.txt new file mode 100644 index 0000000..d2d7e95 --- /dev/null +++ b/deti_coin_example.txt @@ -0,0 +1 @@ +DETI coin 2 000004408026884AAD é fixe. diff --git a/logo.png b/logo.png new file mode 100644 index 0000000..ce2fe97 Binary files /dev/null and b/logo.png differ diff --git a/makefile b/makefile new file mode 100644 index 0000000..a295547 --- /dev/null +++ b/makefile @@ -0,0 +1,66 @@ +# +# Arquiteturas de Alto Desempenho 2025/2026 +# +# makefile for the first practical assignment (A1) +# +# makefile automatic variables: +# $@ is the name of the target +# $< is the name of the first prerequisite +# $^ is the list of names of all prerequisites (without duplicates) +# + +# +# CUDA installation directory --- /usr/local/cuda or $(CUDA_HOME) +# + +CUDA_DIR = /opt/cuda + + +# +# OpenCL installation directory (for a NVidia graphics card, sama as CUDA) +# + +OPENCL_DIR = $(CUDA_DIR) + + +# +# CUDA device architecture +# +# GeForce GTX 1660 Ti --- sm_75 +# RTX A2000 Ada --------- sm_86 +# RTX A6000 Ada --------- sm_86 +# RTX 4070 -------------- sm_89 +# + +CUDA_ARCH = sm_75 + + +# +# clean up +# + +clean: + rm -f sha1_tests + rm -f sha1_cuda_test sha1_cuda_kernel.cubin + rm -f a.out + + +# +# test the CUSTOM_SHA1_CODE macro +# + +sha1_tests: aad_sha1_cpu_tests.c aad_sha1.h aad_data_types.h aad_utilities.h makefile + cc -march=native -Wall -Wshadow -Werror -O3 $< -o $@ + +sha1_cuda_test: aad_sha1_cuda_test.c sha1_cuda_kernel.cubin aad_sha1.h aad_data_types.h aad_utilities.h aad_cuda_utilities.h makefile + cc -march=native -Wall -Wshadow -Werror -O3 -I$(CUDA_DIR)/include $< -o $@ -lcuda + + +# +# compile the CUDA kernels +# + +sha1_cuda_kernel.cubin: aad_sha1_cuda_kernel.cu aad_sha1.h makefile + nvcc -arch=$(CUDA_ARCH) --compiler-options -O2,-Wall -I$(CUDA_DIR)/include --cubin $< -o $@ + +all: sha1_tests sha1_cuda_test sha1_cuda_kernel.cubin diff --git a/rfc3174.txt b/rfc3174.txt new file mode 100644 index 0000000..ebe515d --- /dev/null +++ b/rfc3174.txt @@ -0,0 +1,1235 @@ + + + + + + +Network Working Group D. Eastlake, 3rd +Request for Comments: 3174 Motorola +Category: Informational P. Jones + Cisco Systems + September 2001 + + + US Secure Hash Algorithm 1 (SHA1) + +Status of this Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2001). All Rights Reserved. + +Abstract + + The purpose of this document is to make the SHA-1 (Secure Hash + Algorithm 1) hash algorithm conveniently available to the Internet + community. The United States of America has adopted the SHA-1 hash + algorithm described herein as a Federal Information Processing + Standard. Most of the text herein was taken by the authors from FIPS + 180-1. Only the C code implementation is "original". + +Acknowledgements + + Most of the text herein was taken from [FIPS 180-1]. Only the C code + implementation is "original" but its style is similar to the + previously published MD4 and MD5 RFCs [RFCs 1320, 1321]. + + The SHA-1 is based on principles similar to those used by Professor + Ronald L. Rivest of MIT when designing the MD4 message digest + algorithm [MD4] and is modeled after that algorithm [RFC 1320]. + + Useful comments from the following, which have been incorporated + herein, are gratefully acknowledged: + + Tony Hansen + Garrett Wollman + + + + + + + + +Eastlake & Jones Informational [Page 1] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +Table of Contents + + 1. Overview of Contents........................................... 2 + 2. Definitions of Bit Strings and Integers........................ 3 + 3. Operations on Words............................................ 3 + 4. Message Padding................................................ 4 + 5. Functions and Constants Used................................... 6 + 6. Computing the Message Digest................................... 6 + 6.1 Method 1...................................................... 6 + 6.2 Method 2...................................................... 7 + 7. C Code......................................................... 8 + 7.1 .h file....................................................... 8 + 7.2 .c file....................................................... 10 + 7.3 Test Driver................................................... 18 + 8. Security Considerations........................................ 20 + References........................................................ 21 + Authors' Addresses................................................ 21 + Full Copyright Statement.......................................... 22 + +1. Overview of Contents + + NOTE: The text below is mostly taken from [FIPS 180-1] and assertions + therein of the security of SHA-1 are made by the US Government, the + author of [FIPS 180-1], and not by the authors of this document. + + This document specifies a Secure Hash Algorithm, SHA-1, for computing + a condensed representation of a message or a data file. When a + message of any length < 2^64 bits is input, the SHA-1 produces a + 160-bit output called a message digest. The message digest can then, + for example, be input to a signature algorithm which generates or + verifies the signature for the message. Signing the message digest + rather than the message often improves the efficiency of the process + because the message digest is usually much smaller in size than the + message. The same hash algorithm must be used by the verifier of a + digital signature as was used by the creator of the digital + signature. Any change to the message in transit will, with very high + probability, result in a different message digest, and the signature + will fail to verify. + + The SHA-1 is called secure because it is computationally infeasible + to find a message which corresponds to a given message digest, or to + find two different messages which produce the same message digest. + Any change to a message in transit will, with very high probability, + result in a different message digest, and the signature will fail to + verify. + + + + + + +Eastlake & Jones Informational [Page 2] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + Section 2 below defines the terminology and functions used as + building blocks to form SHA-1. + +2. Definitions of Bit Strings and Integers + + The following terminology related to bit strings and integers will be + used: + + a. A hex digit is an element of the set {0, 1, ... , 9, A, ... , F}. + A hex digit is the representation of a 4-bit string. Examples: 7 + = 0111, A = 1010. + + b. A word equals a 32-bit string which may be represented as a + sequence of 8 hex digits. To convert a word to 8 hex digits each + 4-bit string is converted to its hex equivalent as described in + (a) above. Example: + + 1010 0001 0000 0011 1111 1110 0010 0011 = A103FE23. + + c. An integer between 0 and 2^32 - 1 inclusive may be represented as + a word. The least significant four bits of the integer are + represented by the right-most hex digit of the word + representation. Example: the integer 291 = 2^8+2^5+2^1+2^0 = + 256+32+2+1 is represented by the hex word, 00000123. + + If z is an integer, 0 <= z < 2^64, then z = (2^32)x + y where 0 <= + x < 2^32 and 0 <= y < 2^32. Since x and y can be represented as + words X and Y, respectively, z can be represented as the pair of + words (X,Y). + + d. block = 512-bit string. A block (e.g., B) may be represented as a + sequence of 16 words. + +3. Operations on Words + + The following logical operators will be applied to words: + + a. Bitwise logical word operations + + X AND Y = bitwise logical "and" of X and Y. + + X OR Y = bitwise logical "inclusive-or" of X and Y. + + X XOR Y = bitwise logical "exclusive-or" of X and Y. + + NOT X = bitwise logical "complement" of X. + + + + + +Eastlake & Jones Informational [Page 3] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + Example: + + 01101100101110011101001001111011 + XOR 01100101110000010110100110110111 + -------------------------------- + = 00001001011110001011101111001100 + + b. The operation X + Y is defined as follows: words X and Y + represent integers x and y, where 0 <= x < 2^32 and 0 <= y < 2^32. + For positive integers n and m, let n mod m be the remainder upon + dividing n by m. Compute + + z = (x + y) mod 2^32. + + Then 0 <= z < 2^32. Convert z to a word, Z, and define Z = X + + Y. + + c. The circular left shift operation S^n(X), where X is a word and n + is an integer with 0 <= n < 32, is defined by + + S^n(X) = (X << n) OR (X >> 32-n). + + In the above, X << n is obtained as follows: discard the left-most + n bits of X and then pad the result with n zeroes on the right + (the result will still be 32 bits). X >> n is obtained by + discarding the right-most n bits of X and then padding the result + with n zeroes on the left. Thus S^n(X) is equivalent to a + circular shift of X by n positions to the left. + +4. Message Padding + + SHA-1 is used to compute a message digest for a message or data file + that is provided as input. The message or data file should be + considered to be a bit string. The length of the message is the + number of bits in the message (the empty message has length 0). If + the number of bits in a message is a multiple of 8, for compactness + we can represent the message in hex. The purpose of message padding + is to make the total length of a padded message a multiple of 512. + SHA-1 sequentially processes blocks of 512 bits when computing the + message digest. The following specifies how this padding shall be + performed. As a summary, a "1" followed by m "0"s followed by a 64- + bit integer are appended to the end of the message to produce a + padded message of length 512 * n. The 64-bit integer is the length + of the original message. The padded message is then processed by the + SHA-1 as n 512-bit blocks. + + + + + + +Eastlake & Jones Informational [Page 4] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + Suppose a message has length l < 2^64. Before it is input to the + SHA-1, the message is padded on the right as follows: + + a. "1" is appended. Example: if the original message is "01010000", + this is padded to "010100001". + + b. "0"s are appended. The number of "0"s will depend on the original + length of the message. The last 64 bits of the last 512-bit block + are reserved + + for the length l of the original message. + + Example: Suppose the original message is the bit string + + 01100001 01100010 01100011 01100100 01100101. + + After step (a) this gives + + 01100001 01100010 01100011 01100100 01100101 1. + + Since l = 40, the number of bits in the above is 41 and 407 "0"s + are appended, making the total now 448. This gives (in hex) + + 61626364 65800000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 00000000 00000000. + + c. Obtain the 2-word representation of l, the number of bits in the + original message. If l < 2^32 then the first word is all zeroes. + Append these two words to the padded message. + + Example: Suppose the original message is as in (b). Then l = 40 + (note that l is computed before any padding). The two-word + representation of 40 is hex 00000000 00000028. Hence the final + padded message is hex + + 61626364 65800000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000 + 00000000 00000000 00000000 00000028. + + The padded message will contain 16 * n words for some n > 0. + The padded message is regarded as a sequence of n blocks M(1) , + M(2), first characters (or bits) of the message. + + + + + + +Eastlake & Jones Informational [Page 5] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +5. Functions and Constants Used + + A sequence of logical functions f(0), f(1),..., f(79) is used in + SHA-1. Each f(t), 0 <= t <= 79, operates on three 32-bit words B, C, + D and produces a 32-bit word as output. f(t;B,C,D) is defined as + follows: for words B, C, D, + + f(t;B,C,D) = (B AND C) OR ((NOT B) AND D) ( 0 <= t <= 19) + + f(t;B,C,D) = B XOR C XOR D (20 <= t <= 39) + + f(t;B,C,D) = (B AND C) OR (B AND D) OR (C AND D) (40 <= t <= 59) + + f(t;B,C,D) = B XOR C XOR D (60 <= t <= 79). + + A sequence of constant words K(0), K(1), ... , K(79) is used in the + SHA-1. In hex these are given by + + K(t) = 5A827999 ( 0 <= t <= 19) + + K(t) = 6ED9EBA1 (20 <= t <= 39) + + K(t) = 8F1BBCDC (40 <= t <= 59) + + K(t) = CA62C1D6 (60 <= t <= 79). + +6. Computing the Message Digest + + The methods given in 6.1 and 6.2 below yield the same message digest. + Although using method 2 saves sixty-four 32-bit words of storage, it + is likely to lengthen execution time due to the increased complexity + of the address computations for the { W[t] } in step (c). There are + other computation methods which give identical results. + +6.1 Method 1 + + The message digest is computed using the message padded as described + in section 4. The computation is described using two buffers, each + consisting of five 32-bit words, and a sequence of eighty 32-bit + words. The words of the first 5-word buffer are labeled A,B,C,D,E. + The words of the second 5-word buffer are labeled H0, H1, H2, H3, H4. + The words of the 80-word sequence are labeled W(0), W(1),..., W(79). + A single word buffer TEMP is also employed. + + To generate the message digest, the 16-word blocks M(1), M(2),..., + M(n) defined in section 4 are processed in order. The processing of + each M(i) involves 80 steps. + + + + +Eastlake & Jones Informational [Page 6] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + Before processing any blocks, the H's are initialized as follows: in + hex, + + H0 = 67452301 + + H1 = EFCDAB89 + + H2 = 98BADCFE + + H3 = 10325476 + + H4 = C3D2E1F0. + + Now M(1), M(2), ... , M(n) are processed. To process M(i), we + proceed as follows: + + a. Divide M(i) into 16 words W(0), W(1), ... , W(15), where W(0) + is the left-most word. + + b. For t = 16 to 79 let + + W(t) = S^1(W(t-3) XOR W(t-8) XOR W(t-14) XOR W(t-16)). + + c. Let A = H0, B = H1, C = H2, D = H3, E = H4. + + d. For t = 0 to 79 do + + TEMP = S^5(A) + f(t;B,C,D) + E + W(t) + K(t); + + E = D; D = C; C = S^30(B); B = A; A = TEMP; + + e. Let H0 = H0 + A, H1 = H1 + B, H2 = H2 + C, H3 = H3 + D, H4 = H4 + + E. + + After processing M(n), the message digest is the 160-bit string + represented by the 5 words + + H0 H1 H2 H3 H4. + +6.2 Method 2 + + The method above assumes that the sequence W(0), ... , W(79) is + implemented as an array of eighty 32-bit words. This is efficient + from the standpoint of minimization of execution time, since the + addresses of W(t-3), ... ,W(t-16) in step (b) are easily computed. + If space is at a premium, an alternative is to regard { W(t) } as a + + + + + +Eastlake & Jones Informational [Page 7] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + circular queue, which may be implemented using an array of sixteen + 32-bit words W[0], ... W[15]. In this case, in hex let + + MASK = 0000000F. Then processing of M(i) is as follows: + + a. Divide M(i) into 16 words W[0], ... , W[15], where W[0] is the + left-most word. + + b. Let A = H0, B = H1, C = H2, D = H3, E = H4. + + c. For t = 0 to 79 do + + s = t AND MASK; + + if (t >= 16) W[s] = S^1(W[(s + 13) AND MASK] XOR W[(s + 8) AND + MASK] XOR W[(s + 2) AND MASK] XOR W[s]); + + TEMP = S^5(A) + f(t;B,C,D) + E + W[s] + K(t); + + E = D; D = C; C = S^30(B); B = A; A = TEMP; + + d. Let H0 = H0 + A, H1 = H1 + B, H2 = H2 + C, H3 = H3 + D, H4 = H4 + + E. + +7. C Code + + Below is a demonstration implementation of SHA-1 in C. Section 7.1 + contains the header file, 7.2 the C code, and 7.3 a test driver. + +7.1 .h file + +/* + * sha1.h + * + * Description: + * This is the header file for code which implements the Secure + * Hashing Algorithm 1 as defined in FIPS PUB 180-1 published + * April 17, 1995. + * + * Many of the variable names in this code, especially the + * single character names, were used because those were the names + * used in the publication. + * + * Please read the file sha1.c for more information. + * + */ + + + + + +Eastlake & Jones Informational [Page 8] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +#ifndef _SHA1_H_ +#define _SHA1_H_ + +#include +/* + * If you do not have the ISO standard stdint.h header file, then you + * must typdef the following: + * name meaning + * uint32_t unsigned 32 bit integer + * uint8_t unsigned 8 bit integer (i.e., unsigned char) + * int_least16_t integer of >= 16 bits + * + */ + +#ifndef _SHA_enum_ +#define _SHA_enum_ +enum +{ + shaSuccess = 0, + shaNull, /* Null pointer parameter */ + shaInputTooLong, /* input data too long */ + shaStateError /* called Input after Result */ +}; +#endif +#define SHA1HashSize 20 + +/* + * This structure will hold context information for the SHA-1 + * hashing operation + */ +typedef struct SHA1Context +{ + uint32_t Intermediate_Hash[SHA1HashSize/4]; /* Message Digest */ + + uint32_t Length_Low; /* Message length in bits */ + uint32_t Length_High; /* Message length in bits */ + + /* Index into message block array */ + int_least16_t Message_Block_Index; + uint8_t Message_Block[64]; /* 512-bit message blocks */ + + int Computed; /* Is the digest computed? */ + int Corrupted; /* Is the message digest corrupted? */ +} SHA1Context; + +/* + * Function Prototypes + */ + + + +Eastlake & Jones Informational [Page 9] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +int SHA1Reset( SHA1Context *); +int SHA1Input( SHA1Context *, + const uint8_t *, + unsigned int); +int SHA1Result( SHA1Context *, + uint8_t Message_Digest[SHA1HashSize]); + +#endif + +7.2 .c file + +/* + * sha1.c + * + * Description: + * This file implements the Secure Hashing Algorithm 1 as + * defined in FIPS PUB 180-1 published April 17, 1995. + * + * The SHA-1, produces a 160-bit message digest for a given + * data stream. It should take about 2**n steps to find a + * message with the same digest as a given message and + * 2**(n/2) to find any two messages with the same digest, + * when n is the digest size in bits. Therefore, this + * algorithm can serve as a means of providing a + * "fingerprint" for a message. + * + * Portability Issues: + * SHA-1 is defined in terms of 32-bit "words". This code + * uses (included via "sha1.h" to define 32 and 8 + * bit unsigned integer types. If your C compiler does not + * support 32 bit unsigned integers, this code is not + * appropriate. + * + * Caveats: + * SHA-1 is designed to work with messages less than 2^64 bits + * long. Although SHA-1 allows a message digest to be generated + * for messages of any number of bits less than 2^64, this + * implementation only works with messages with a length that is + * a multiple of the size of an 8-bit character. + * + */ + + + + + + + + + + +Eastlake & Jones Informational [Page 10] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +#include "sha1.h" + +/* + * Define the SHA1 circular left shift macro + */ +#define SHA1CircularShift(bits,word) \ + (((word) << (bits)) | ((word) >> (32-(bits)))) + +/* Local Function Prototyptes */ +void SHA1PadMessage(SHA1Context *); +void SHA1ProcessMessageBlock(SHA1Context *); + +/* + * SHA1Reset + * + * Description: + * This function will initialize the SHA1Context in preparation + * for computing a new SHA1 message digest. + * + * Parameters: + * context: [in/out] + * The context to reset. + * + * Returns: + * sha Error Code. + * + */ +int SHA1Reset(SHA1Context *context) +{ + if (!context) + { + return shaNull; + } + + context->Length_Low = 0; + context->Length_High = 0; + context->Message_Block_Index = 0; + + context->Intermediate_Hash[0] = 0x67452301; + context->Intermediate_Hash[1] = 0xEFCDAB89; + context->Intermediate_Hash[2] = 0x98BADCFE; + context->Intermediate_Hash[3] = 0x10325476; + context->Intermediate_Hash[4] = 0xC3D2E1F0; + + context->Computed = 0; + context->Corrupted = 0; + + + + + +Eastlake & Jones Informational [Page 11] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + return shaSuccess; +} + +/* + * SHA1Result + * + * Description: + * This function will return the 160-bit message digest into the + * Message_Digest array provided by the caller. + * NOTE: The first octet of hash is stored in the 0th element, + * the last octet of hash in the 19th element. + * + * Parameters: + * context: [in/out] + * The context to use to calculate the SHA-1 hash. + * Message_Digest: [out] + * Where the digest is returned. + * + * Returns: + * sha Error Code. + * + */ +int SHA1Result( SHA1Context *context, + uint8_t Message_Digest[SHA1HashSize]) +{ + int i; + + if (!context || !Message_Digest) + { + return shaNull; + } + + if (context->Corrupted) + { + return context->Corrupted; + } + + if (!context->Computed) + { + SHA1PadMessage(context); + for(i=0; i<64; ++i) + { + /* message may be sensitive, clear it out */ + context->Message_Block[i] = 0; + } + context->Length_Low = 0; /* and clear length */ + context->Length_High = 0; + context->Computed = 1; + + + +Eastlake & Jones Informational [Page 12] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + } + + for(i = 0; i < SHA1HashSize; ++i) + { + Message_Digest[i] = context->Intermediate_Hash[i>>2] + >> 8 * ( 3 - ( i & 0x03 ) ); + } + + return shaSuccess; +} + +/* + * SHA1Input + * + * Description: + * This function accepts an array of octets as the next portion + * of the message. + * + * Parameters: + * context: [in/out] + * The SHA context to update + * message_array: [in] + * An array of characters representing the next portion of + * the message. + * length: [in] + * The length of the message in message_array + * + * Returns: + * sha Error Code. + * + */ +int SHA1Input( SHA1Context *context, + const uint8_t *message_array, + unsigned length) +{ + if (!length) + { + return shaSuccess; + } + + if (!context || !message_array) + { + return shaNull; + } + + if (context->Computed) + { + context->Corrupted = shaStateError; + + + +Eastlake & Jones Informational [Page 13] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + return shaStateError; + } + + if (context->Corrupted) + { + return context->Corrupted; + } + while(length-- && !context->Corrupted) + { + context->Message_Block[context->Message_Block_Index++] = + (*message_array & 0xFF); + + context->Length_Low += 8; + if (context->Length_Low == 0) + { + context->Length_High++; + if (context->Length_High == 0) + { + /* Message is too long */ + context->Corrupted = 1; + } + } + + if (context->Message_Block_Index == 64) + { + SHA1ProcessMessageBlock(context); + } + + message_array++; + } + + return shaSuccess; +} + +/* + * SHA1ProcessMessageBlock + * + * Description: + * This function will process the next 512 bits of the message + * stored in the Message_Block array. + * + * Parameters: + * None. + * + * Returns: + * Nothing. + * + * Comments: + + + +Eastlake & Jones Informational [Page 14] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + * Many of the variable names in this code, especially the + * single character names, were used because those were the + * names used in the publication. + * + * + */ +void SHA1ProcessMessageBlock(SHA1Context *context) +{ + const uint32_t K[] = { /* Constants defined in SHA-1 */ + 0x5A827999, + 0x6ED9EBA1, + 0x8F1BBCDC, + 0xCA62C1D6 + }; + int t; /* Loop counter */ + uint32_t temp; /* Temporary word value */ + uint32_t W[80]; /* Word sequence */ + uint32_t A, B, C, D, E; /* Word buffers */ + + /* + * Initialize the first 16 words in the array W + */ + for(t = 0; t < 16; t++) + { + W[t] = context->Message_Block[t * 4] << 24; + W[t] |= context->Message_Block[t * 4 + 1] << 16; + W[t] |= context->Message_Block[t * 4 + 2] << 8; + W[t] |= context->Message_Block[t * 4 + 3]; + } + + for(t = 16; t < 80; t++) + { + W[t] = SHA1CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]); + } + + A = context->Intermediate_Hash[0]; + B = context->Intermediate_Hash[1]; + C = context->Intermediate_Hash[2]; + D = context->Intermediate_Hash[3]; + E = context->Intermediate_Hash[4]; + + for(t = 0; t < 20; t++) + { + temp = SHA1CircularShift(5,A) + + ((B & C) | ((~B) & D)) + E + W[t] + K[0]; + E = D; + D = C; + C = SHA1CircularShift(30,B); + + + +Eastlake & Jones Informational [Page 15] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + B = A; + A = temp; + } + + for(t = 20; t < 40; t++) + { + temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1]; + E = D; + D = C; + C = SHA1CircularShift(30,B); + B = A; + A = temp; + } + + for(t = 40; t < 60; t++) + { + temp = SHA1CircularShift(5,A) + + ((B & C) | (B & D) | (C & D)) + E + W[t] + K[2]; + E = D; + D = C; + C = SHA1CircularShift(30,B); + B = A; + A = temp; + } + + for(t = 60; t < 80; t++) + { + temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3]; + E = D; + D = C; + C = SHA1CircularShift(30,B); + B = A; + A = temp; + } + + context->Intermediate_Hash[0] += A; + context->Intermediate_Hash[1] += B; + context->Intermediate_Hash[2] += C; + context->Intermediate_Hash[3] += D; + context->Intermediate_Hash[4] += E; + + context->Message_Block_Index = 0; +} + + +/* + * SHA1PadMessage + * + + + +Eastlake & Jones Informational [Page 16] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + * Description: + * According to the standard, the message must be padded to an even + * 512 bits. The first padding bit must be a '1'. The last 64 + * bits represent the length of the original message. All bits in + * between should be 0. This function will pad the message + * according to those rules by filling the Message_Block array + * accordingly. It will also call the ProcessMessageBlock function + * provided appropriately. When it returns, it can be assumed that + * the message digest has been computed. + * + * Parameters: + * context: [in/out] + * The context to pad + * ProcessMessageBlock: [in] + * The appropriate SHA*ProcessMessageBlock function + * Returns: + * Nothing. + * + */ + +void SHA1PadMessage(SHA1Context *context) +{ + /* + * Check to see if the current message block is too small to hold + * the initial padding bits and length. If so, we will pad the + * block, process it, and then continue padding into a second + * block. + */ + if (context->Message_Block_Index > 55) + { + context->Message_Block[context->Message_Block_Index++] = 0x80; + while(context->Message_Block_Index < 64) + { + context->Message_Block[context->Message_Block_Index++] = 0; + } + + SHA1ProcessMessageBlock(context); + + while(context->Message_Block_Index < 56) + { + context->Message_Block[context->Message_Block_Index++] = 0; + } + } + else + { + context->Message_Block[context->Message_Block_Index++] = 0x80; + while(context->Message_Block_Index < 56) + { + + + +Eastlake & Jones Informational [Page 17] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + context->Message_Block[context->Message_Block_Index++] = 0; + } + } + + /* + * Store the message length as the last 8 octets + */ + context->Message_Block[56] = context->Length_High >> 24; + context->Message_Block[57] = context->Length_High >> 16; + context->Message_Block[58] = context->Length_High >> 8; + context->Message_Block[59] = context->Length_High; + context->Message_Block[60] = context->Length_Low >> 24; + context->Message_Block[61] = context->Length_Low >> 16; + context->Message_Block[62] = context->Length_Low >> 8; + context->Message_Block[63] = context->Length_Low; + + SHA1ProcessMessageBlock(context); +} + +7.3 Test Driver + + The following code is a main program test driver to exercise the code + in sha1.c. + +/* + * sha1test.c + * + * Description: + * This file will exercise the SHA-1 code performing the three + * tests documented in FIPS PUB 180-1 plus one which calls + * SHA1Input with an exact multiple of 512 bits, plus a few + * error test checks. + * + * Portability Issues: + * None. + * + */ + +#include +#include +#include +#include "sha1.h" + +/* + * Define patterns for testing + */ +#define TEST1 "abc" +#define TEST2a "abcdbcdecdefdefgefghfghighijhi" + + + +Eastlake & Jones Informational [Page 18] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +#define TEST2b "jkijkljklmklmnlmnomnopnopq" +#define TEST2 TEST2a TEST2b +#define TEST3 "a" +#define TEST4a "01234567012345670123456701234567" +#define TEST4b "01234567012345670123456701234567" + /* an exact multiple of 512 bits */ +#define TEST4 TEST4a TEST4b +char *testarray[4] = +{ + TEST1, + TEST2, + TEST3, + TEST4 +}; +long int repeatcount[4] = { 1, 1, 1000000, 10 }; +char *resultarray[4] = +{ + "A9 99 3E 36 47 06 81 6A BA 3E 25 71 78 50 C2 6C 9C D0 D8 9D", + "84 98 3E 44 1C 3B D2 6E BA AE 4A A1 F9 51 29 E5 E5 46 70 F1", + "34 AA 97 3C D4 C4 DA A4 F6 1E EB 2B DB AD 27 31 65 34 01 6F", + "DE A3 56 A2 CD DD 90 C7 A7 EC ED C5 EB B5 63 93 4F 46 04 52" +}; + +int main() +{ + SHA1Context sha; + int i, j, err; + uint8_t Message_Digest[20]; + + /* + * Perform SHA-1 tests + */ + for(j = 0; j < 4; ++j) + { + printf( "\nTest %d: %d, '%s'\n", + j+1, + repeatcount[j], + testarray[j]); + + err = SHA1Reset(&sha); + if (err) + { + fprintf(stderr, "SHA1Reset Error %d.\n", err ); + break; /* out of for j loop */ + } + + for(i = 0; i < repeatcount[j]; ++i) + { + + + +Eastlake & Jones Informational [Page 19] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + + err = SHA1Input(&sha, + (const unsigned char *) testarray[j], + strlen(testarray[j])); + if (err) + { + fprintf(stderr, "SHA1Input Error %d.\n", err ); + break; /* out of for i loop */ + } + } + + err = SHA1Result(&sha, Message_Digest); + if (err) + { + fprintf(stderr, + "SHA1Result Error %d, could not compute message digest.\n", + err ); + } + else + { + printf("\t"); + for(i = 0; i < 20 ; ++i) + { + printf("%02X ", Message_Digest[i]); + } + printf("\n"); + } + printf("Should match:\n"); + printf("\t%s\n", resultarray[j]); + } + + /* Test some error returns */ + err = SHA1Input(&sha,(const unsigned char *) testarray[1], 1); + printf ("\nError %d. Should be %d.\n", err, shaStateError ); + err = SHA1Reset(0); + printf ("\nError %d. Should be %d.\n", err, shaNull ); + return 0; +} + +8. Security Considerations + + This document is intended to provide convenient open source access by + the Internet community to the United States of America Federal + Information Processing Standard Secure Hash Function SHA-1 [FIPS + 180-1]. No independent assertion of the security of this hash + function by the authors for any particular use is intended. + + + + + + +Eastlake & Jones Informational [Page 20] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +References + + [FIPS 180-1] "Secure Hash Standard", United States of American, + National Institute of Science and Technology, Federal + Information Processing Standard (FIPS) 180-1, April + 1993. + + [MD4] "The MD4 Message Digest Algorithm," Advances in + Cryptology - CRYPTO '90 Proceedings, Springer-Verlag, + 1991, pp. 303-311. + + [RFC 1320] Rivest, R., "The MD4 Message-Digest Algorithm", RFC + 1320, April 1992. + + [RFC 1321] Rivest, R., "The MD5 Message-Digest Algorithm", RFC + 1321, April 1992. + + [RFC 1750] Eastlake, D., Crocker, S. and J. Schiller, "Randomness + Requirements for Security", RFC 1750, December 1994. + +Authors' Addresses + + Donald E. Eastlake, 3rd + Motorola + 155 Beaver Street + Milford, MA 01757 USA + + Phone: +1 508-634-2066 (h) + +1 508-261-5434 (w) + Fax: +1 508-261-4777 + EMail: Donald.Eastlake@motorola.com + + + Paul E. Jones + Cisco Systems, Inc. + 7025 Kit Creek Road + Research Triangle Park, NC 27709 USA + + Phone: +1 919 392 6948 + EMail: paulej@packetizer.com + + + + + + + + + + + +Eastlake & Jones Informational [Page 21] + +RFC 3174 US Secure Hash Algorithm 1 (SHA1) September 2001 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2001). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Eastlake & Jones Informational [Page 22] + diff --git a/sha1_cuda_kernel.cubin b/sha1_cuda_kernel.cubin new file mode 100644 index 0000000..957c529 Binary files /dev/null and b/sha1_cuda_kernel.cubin differ diff --git a/sha1_tests b/sha1_tests new file mode 100755 index 0000000..b3ee06c Binary files /dev/null and b/sha1_tests differ diff --git a/test_vault.bash b/test_vault.bash new file mode 100644 index 0000000..2155ae0 --- /dev/null +++ b/test_vault.bash @@ -0,0 +1,8 @@ +#! /bin/bash + +set -e +vault_file=deti_coins_v2_vault.txt +while IFS= read -r line; do + hash=$(echo "$line" | cut -b 5- | sha1sum | cut -b 1-40) + echo "$line --- $hash" +done < ${vault_file}