diff --git a/aad_coin_miner_cuda.c b/aad_coin_miner_cuda.c
index 8080b0f..7056b9b 100644
--- a/aad_coin_miner_cuda.c
+++ b/aad_coin_miner_cuda.c
@@ -26,47 +26,6 @@ void signal_handler(int signum)
   keep_running = 0;
 }
 
-// Histogram data structures
-typedef struct {
-  u32_t bins[MAX_HISTOGRAM_BINS];
-  u32_t count;
-  double min_value;
-  double max_value;
-} histogram_t;
-
-static void histogram_init(histogram_t *h)
-{
-  memset(h->bins, 0, sizeof(h->bins));
-  h->count = 0;
-  h->min_value = 1e99;
-  h->max_value = 0.0;
-}
-
-static void histogram_add(histogram_t *h, double value)
-{
-  if(value < h->min_value)
-    h->min_value = value;
-  if(value > h->max_value)
-    h->max_value = value;
-  h->count++;
-
-  // For now, just count - we'll bin them later when printing
-}
-
-static void histogram_print(histogram_t *h, const char *title, int n_bins)
-{
-  if(h->count == 0)
-  {
-    printf("%s: No data\n", title);
-    return;
-  }
-
-  printf("\n%s:\n", title);
-  printf("  Count: %u\n", h->count);
-  printf("  Min: %.6f\n", h->min_value);
-  printf("  Max: %.6f\n", h->max_value);
-  printf("  Avg: %.6f\n", (h->min_value + h->max_value) / 2.0);
-}
 
 // Coin reconstruction from stored data
 static void reconstruct_coin(u32_t *stored_data, u32_t coin[14])
@@ -87,19 +46,6 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
   u32_t coins_found = 0;
   u32_t kernel_runs = 0;
 
-  // Histograms
-  histogram_t time_histogram;
-  histogram_t coins_histogram;
-  double *kernel_times = NULL;
-  u32_t *kernel_coin_counts = NULL;
-  u32_t histogram_capacity = 10000;
-
-  histogram_init(&time_histogram);
-  histogram_init(&coins_histogram);
-
-  kernel_times = (double *)malloc(histogram_capacity * sizeof(double));
-  kernel_coin_counts = (u32_t *)malloc(histogram_capacity * sizeof(u32_t));
-
   // Initialize CUDA
   cd.device_number = 0;
   cd.cubin_file_name = "coin_miner_cuda_kernel.cubin";
@@ -112,7 +58,7 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
   host_storage = (u32_t *)cd.host_data[0];
 
   // Kernel configuration
-  cd.block_dim_x = RECOMENDED_CUDA_BLOCK_SIZE;
+  cd.block_dim_x = RECOMMENDED_CUDA_BLOCK_SIZE;
   cd.grid_dim_x = 4096;  // Large grid for maximum GPU utilization
 
   u32_t n_threads = cd.grid_dim_x * cd.block_dim_x;
@@ -128,9 +74,7 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
   int scan_pos = 12;
 
   time_measurement();
-  time_measurement();
-  double start_time = wall_time_delta();
-  double last_report_time = 0.0;
+  // double start_time = wall_time_delta();
 
   while(keep_running && (max_attempts == 0 || attempts < max_attempts))
   {
@@ -148,13 +92,8 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
     if(use_scan_kernel)
       cd.arg[3] = &scan_pos;
 
-    // Launch kernel and measure time
-    time_measurement();
-    double kernel_start = cpu_time_delta();
-    lauch_kernel(&cd);
-    time_measurement();
-    double kernel_end = cpu_time_delta();
-    double kernel_time = kernel_end - kernel_start;
+    // Launch the CUDA kernel
+    launch_kernel(&cd);
 
     // Copy results back
     device_to_host_copy(&cd, 0);
@@ -203,16 +142,6 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
       }
     }
 
-    // Update histograms
-    if(kernel_runs < histogram_capacity)
-    {
-      kernel_times[kernel_runs] = kernel_time;
-      kernel_coin_counts[kernel_runs] = n_coins_this_kernel;
-    }
-
-    histogram_add(&time_histogram, kernel_time);
-    histogram_add(&coins_histogram, (double)n_coins_this_kernel);
-
     // Update counters
     kernel_runs++;
     if(use_scan_kernel)
@@ -226,21 +155,11 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
     if(use_scan_kernel)
       scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53
 
-    // Print progress every second
-    time_measurement();
-    double current_time = wall_time_delta() - start_time;
-    if(current_time - last_report_time >= 1.0)
-    {
-      double rate = attempts / current_time;
-      printf("Attempts: %llu, Rate: %.2f MH/s, Coins: %u, Kernels: %u, Avg time: %.6f s\n",
-             (unsigned long long)attempts, rate / 1e6, coins_found, kernel_runs,
-             current_time / kernel_runs);
-      last_report_time = current_time;
-    }
+
   }
 
   time_measurement();
-  double total_time = wall_time_delta() - start_time;
+  double total_time = cpu_time_delta();
 
   printf("\n=== Mining Statistics ===\n");
   printf("Total attempts: %llu\n", (unsigned long long)attempts);
@@ -249,30 +168,10 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel)
   printf("Coins found: %u\n", coins_found);
   printf("Kernel launches: %u\n", kernel_runs);
 
-  // Print histograms
-  histogram_print(&time_histogram, "Kernel Execution Time Histogram", 20);
-  histogram_print(&coins_histogram, "Coins Found Per Kernel Histogram", 10);
-
-  // Save detailed histogram data
-  FILE *fp = fopen("cuda_kernel_stats.csv", "w");
-  if(fp != NULL)
-  {
-    fprintf(fp, "kernel_id,time_seconds,coins_found\n");
-    u32_t n_to_save = (kernel_runs < histogram_capacity) ? kernel_runs : histogram_capacity;
-    for(u32_t i = 0; i < n_to_save; i++)
-    {
-      fprintf(fp, "%u,%.9f,%u\n", i, kernel_times[i], kernel_coin_counts[i]);
-    }
-    fclose(fp);
-    printf("\nDetailed statistics saved to cuda_kernel_stats.csv\n");
-  }
 
   // Save any remaining coins
   save_coin(NULL);
 
-  // Cleanup
-  free(kernel_times);
-  free(kernel_coin_counts);
   terminate_cuda(&cd);
 }
 
diff --git a/aad_coin_miner_cuda_kernel.cu b/aad_coin_miner_cuda_kernel.cu
index 0683c51..f64869f 100644
--- a/aad_coin_miner_cuda_kernel.cu
+++ b/aad_coin_miner_cuda_kernel.cu
@@ -5,17 +5,14 @@
 //
 
 #include "aad_sha1.h"
-
-typedef unsigned int u32_t;
-typedef unsigned char u08_t;
-typedef unsigned long long u64_t;
+#include "aad_data_types.h"
 
 //
 // Optimized CUDA kernel for DETI coin mining
 // Each thread generates coins using the same approach as CPU/SIMD miners
 //
 
-extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1)
+extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
 void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2)
 {
   u32_t coin[14];
@@ -47,16 +44,21 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param
   offset += (u64_t)n;
 
   // Apply offset to variable part (increment the coin counter)
-  // Use the same carry logic as CPU/SIMD miners
   for(int pos = 53; pos >= 12 && offset > 0; pos--)
   {
     u08_t *byte = &bytes[pos ^ 3];
-    u64_t add = offset % 95;  // Range: 32-126 (95 values)
-    offset /= 95;
-    
-    u32_t val = (*byte - 32 + add);
-    *byte = 32 + (val % 95);
-    offset += val / 95;  // Carry
+    u64_t add = offset % 127;
+    offset /= 127;
+
+    u32_t val = *byte + add;
+    u08_t new_val = val % 127;
+
+    // Skip newline character (ASCII 10) in the variable part
+    if(new_val == '\n')
+      new_val++;
+
+    *byte = new_val;
+    offset += val / 127;  // Carry
   }
 
   // Compute SHA1 hash
@@ -92,7 +94,7 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param
 // Kernel that tries all possible values for one character position
 //
 
-extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1)
+extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
 void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2, int scan_position)
 {
   u32_t coin[14];
@@ -130,7 +132,13 @@ void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t
     offset /= 95;
     
     u32_t val = (*byte - 32 + add);
-    *byte = 32 + (val % 95);
+    u08_t new_val = 32 + (val % 95);
+
+    // Skip newline character (ASCII 10) in the variable part
+    if(new_val == '\n')
+      new_val++;
+
+    *byte = new_val;
     offset += val / 95;
   }
 
diff --git a/aad_cuda_utilities.h b/aad_cuda_utilities.h
index 290894c..1ed5bd7 100644
--- a/aad_cuda_utilities.h
+++ b/aad_cuda_utilities.h
@@ -36,7 +36,7 @@ typedef struct
   CUdeviceptr  device_data[2];   // the pointers to the device data
   // launch kernel data
   unsigned int grid_dim_x;       // the number of grid blocks (in the X dimension, the only one we will use here)
-  unsigned int block_dim_x;      // the number of threads in a block (in the X dimension, the only one we will use here, should be equal to RECOMENDED_CUDA_BLOCK_SIZE)
+  unsigned int block_dim_x;      // the number of threads in a block (in the X dimension, the only one we will use here, should be equal to RECOMMENDED_CUDA_BLOCK_SIZE)
   int n_kernel_arguments;        // number of kernel arguments
   void *arg[MAX_N_ARGUMENTS];    // pointers to the kernel argument data
 
@@ -260,10 +260,10 @@ static void device_to_host_copy(cuda_data_t *cd,int idx)
 // launch a CUDA kernel (with 0 bytes of shared memory and no extra options)
 //
 
-static void lauch_kernel(cuda_data_t *cd)
+static void launch_kernel(cuda_data_t *cd)
 {
-  if(cd->block_dim_x != (unsigned int)RECOMENDED_CUDA_BLOCK_SIZE)
-    fprintf(stderr,"lauch_kernel(): block_dim_x should be equal to %d\n",RECOMENDED_CUDA_BLOCK_SIZE);
+  if(cd->block_dim_x != (unsigned int)RECOMMENDED_CUDA_BLOCK_SIZE)
+    fprintf(stderr,"launch_kernel(): block_dim_x should be equal to %d\n",RECOMMENDED_CUDA_BLOCK_SIZE);
   CU_CALL( cuLaunchKernel , (cd->cu_kernel,cd->grid_dim_x,1u,1u,cd->block_dim_x,1u,1u,0u,cd->cu_stream,&cd->arg[0],NULL) );
   synchronize_cuda(cd);
 }
diff --git a/aad_sha1.h b/aad_sha1.h
index 54c4fe3..5f7e1aa 100644
--- a/aad_sha1.h
+++ b/aad_sha1.h
@@ -28,7 +28,7 @@
 //
 // we place this here to simplify things (aad_sha1_cuda_kernel.cu includes this file...)
 //
-#define RECOMENDED_CUDA_BLOCK_SIZE  128
+#define RECOMMENDED_CUDA_BLOCK_SIZE  128
 
 
 //
diff --git a/aad_sha1_cuda_kernel.cu b/aad_sha1_cuda_kernel.cu
index 9d67b94..ab29bc7 100644
--- a/aad_sha1_cuda_kernel.cu
+++ b/aad_sha1_cuda_kernel.cu
@@ -12,8 +12,7 @@
 //
 
 #include "aad_sha1.h"
-
-typedef unsigned int u32_t;
+#include "aad_data_types.h"
 
 //
 // the nvcc compiler stores w[] in registers (constant indices!)
@@ -23,7 +22,7 @@ typedef unsigned int u32_t;
 // warp thread number: n & 31 -- the lane
 //
 
-extern "C" __global__ __launch_bounds__(RECOMENDED_CUDA_BLOCK_SIZE,1)
+extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1)
 void sha1_cuda_kernel(u32_t *interleaved32_data,u32_t *interleaved32_hash)
 {
   u32_t n;
diff --git a/aad_sha1_cuda_test.c b/aad_sha1_cuda_test.c
index 4a73c47..0d4bdce 100644
--- a/aad_sha1_cuda_test.c
+++ b/aad_sha1_cuda_test.c
@@ -19,7 +19,7 @@ static void test_sha1_cuda(int n_tests)
   double host_to_device_time,kernel_time,device_to_host_time,hashes_per_second;
   cuda_data_t cd;
 
-  if(n_tests <= 0 || n_tests > (1 << 24) || n_tests % RECOMENDED_CUDA_BLOCK_SIZE != 0)
+  if(n_tests <= 0 || n_tests > (1 << 24) || n_tests % RECOMMENDED_CUDA_BLOCK_SIZE != 0)
   {
     fprintf(stderr,"test_sha1_cuda(): bad number of tests\n");
     exit(1);
@@ -44,13 +44,13 @@ static void test_sha1_cuda(int n_tests)
   host_to_device_copy(&cd,0); // idx=0 means that the interleaved32_data is copied to the CUDA device
   time_measurement();
   host_to_device_time = wall_time_delta();
-  cd.grid_dim_x = (u32_t)n_tests / (u32_t)RECOMENDED_CUDA_BLOCK_SIZE;
-  cd.block_dim_x = (u32_t)RECOMENDED_CUDA_BLOCK_SIZE;
+  cd.grid_dim_x = (u32_t)n_tests / (u32_t)RECOMMENDED_CUDA_BLOCK_SIZE;
+  cd.block_dim_x = (u32_t)RECOMMENDED_CUDA_BLOCK_SIZE;
   cd.n_kernel_arguments = 2;
   cd.arg[0] = &cd.device_data[0]; // interleaved32_data
   cd.arg[1] = &cd.device_data[1]; // interleaved32_hash
   time_measurement();
-  lauch_kernel(&cd);
+  launch_kernel(&cd);
   time_measurement();
   kernel_time = wall_time_delta();
   time_measurement();