diff --git a/aad_coin_miner_cuda.c b/aad_coin_miner_cuda.c index 7056b9b..bbd4bfd 100644 --- a/aad_coin_miner_cuda.c +++ b/aad_coin_miner_cuda.c @@ -38,7 +38,7 @@ static void reconstruct_coin(u32_t *stored_data, u32_t coin[14]) // // Mine DETI coins using CUDA // -static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) +static void mine_coins_cuda(u64_t max_attempts) { cuda_data_t cd; u32_t *host_storage; @@ -49,7 +49,7 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) // Initialize CUDA cd.device_number = 0; cd.cubin_file_name = "coin_miner_cuda_kernel.cubin"; - cd.kernel_name = use_scan_kernel ? "mine_deti_coins_scan_kernel" : "mine_deti_coins_kernel"; + cd.kernel_name = "mine_deti_coins_kernel"; cd.data_size[0] = COINS_STORAGE_SIZE * sizeof(u32_t); cd.data_size[1] = 0; @@ -69,12 +69,10 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) printf("Kernel: %s\n", cd.kernel_name); printf("Press Ctrl+C to stop\n\n"); - u32_t param1 = (u32_t)time(NULL); - u32_t param2 = 0x12345678u; - int scan_pos = 12; + u64_t base_nonce = 0; + u32_t attempts_per_thread = 1024 * 8; // Increased attempts per thread time_measurement(); - // double start_time = wall_time_delta(); while(keep_running && (max_attempts == 0 || attempts < max_attempts)) { @@ -85,12 +83,10 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) host_to_device_copy(&cd, 0); // Set kernel arguments - cd.n_kernel_arguments = use_scan_kernel ? 4 : 3; + cd.n_kernel_arguments = 2; cd.arg[0] = &cd.device_data[0]; - cd.arg[1] = ¶m1; - cd.arg[2] = ¶m2; - if(use_scan_kernel) - cd.arg[3] = &scan_pos; + cd.arg[1] = &base_nonce; + cd.arg[2] = &attempts_per_thread; // Launch the CUDA kernel launch_kernel(&cd); @@ -104,58 +100,24 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) if(n_stored > 0 && host_storage[0] < COINS_STORAGE_SIZE) { - printf("DEBUG: host_storage[0] = %u, n_stored = %u\n", host_storage[0], n_stored); - for(u32_t i = 0; i < n_stored; i++) { u32_t coin[14]; reconstruct_coin(&host_storage[1 + i * 14], coin); - // Verify it's actually a valid coin - u32_t hash[5]; - sha1(coin, hash); - - printf("DEBUG: Coin %u - hash[0] = 0x%08X (expected 0xAAD20250)\n", i, hash[0]); - - // Print the coin as string - if(i == 0) { - printf("DEBUG: First coin content: "); - u08_t *bytes = (u08_t *)coin; - for(int j = 0; j < 55; j++) { - char c = bytes[j ^ 3]; - if(c >= 32 && c <= 126) - printf("%c", c); - else - printf("[0x%02X]", (u08_t)c); - } - printf("\n"); - } - - if(hash[0] == 0xAAD20250u) - { - coins_found++; - n_coins_this_kernel++; - printf("COIN FOUND! (kernel %u, coin %u in this kernel)\n", - kernel_runs, n_coins_this_kernel); - save_coin(coin); - } + coins_found++; + n_coins_this_kernel++; + printf("COIN FOUND! (kernel %u, coin %u in this kernel). Total coins:%u\n", + kernel_runs, n_coins_this_kernel, coins_found); + save_coin(coin); } } // Update counters kernel_runs++; - if(use_scan_kernel) - attempts += n_threads * 256; // Each thread tries 256 values - else - attempts += n_threads; - - // Update parameters for next iteration - param1++; - param2 = param2 ^ 0x9E3779B9u; - if(use_scan_kernel) - scan_pos = (scan_pos + 1) % 42 + 12; // Cycle through positions 12-53 - - + u64_t attempts_this_launch = (u64_t)n_threads * attempts_per_thread; + attempts += attempts_this_launch; + base_nonce += attempts_this_launch; } time_measurement(); @@ -178,20 +140,13 @@ static void mine_coins_cuda(u64_t max_attempts, int use_scan_kernel) int main(int argc, char *argv[]) { u64_t max_attempts = 0; - int use_scan_kernel = 0; signal(SIGINT, signal_handler); if(argc > 1) max_attempts = strtoull(argv[1], NULL, 10); - if(argc > 2 && strcmp(argv[2], "scan") == 0) - { - use_scan_kernel = 1; - printf("Using scan kernel (tries 256 values per thread)\n"); - } - - mine_coins_cuda(max_attempts, use_scan_kernel); + mine_coins_cuda(max_attempts); return 0; } diff --git a/aad_coin_miner_cuda_kernel.cu b/aad_coin_miner_cuda_kernel.cu index f64869f..7c0c873 100644 --- a/aad_coin_miner_cuda_kernel.cu +++ b/aad_coin_miner_cuda_kernel.cu @@ -13,7 +13,7 @@ // extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1) -void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2) +void mine_deti_coins_kernel(u32_t *coins_storage_area, u64_t base_nonce, u32_t attempts_per_thread) { u32_t coin[14]; u32_t hash[5]; @@ -25,41 +25,45 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param // Initialize coin template: "DETI coin 2 " + variable + "\n\x80" // Use byte-swapped format to match host expectations (idx ^ 3) - coin[0] = 0x44455449u; // "DETI" with byte swap - coin[1] = 0x20636F69u; // " coi" with byte swap - coin[2] = 0x6E203220u; // "n 2 " with byte swap + coin[0] = ('D' << 24) + ('E' << 16) + ('T' << 8) + 'I'; + coin[1] = (' ' << 24) + ('c' << 16) + ('o' << 8) + 'i'; + coin[2] = ('n' << 24) + (' ' << 16) + ('2' << 8) + ' '; - // Initialize variable part (positions 12-53, 42 bytes) - // Start with A-Z pattern like CPU/SIMD miners - for(int i = 12; i < 54; i++) - bytes[i ^ 3] = 'A' + ((i - 12) % 26); + // Fill the variable part of the coin with a pattern + for(int i = 3; i < 14; i++) + coin[i] = 0x41414141; // 'AAAA' // End with newline and padding bytes[0x36 ^ 3] = '\n'; // Position 54 bytes[0x37 ^ 3] = 0x80; // Position 55 - // Calculate offset based on thread index and parameters - // This creates a unique starting point for each thread - u64_t offset = ((u64_t)param1 << 32) | param2; - offset += (u64_t)n; + for(u32_t i = 0; i < attempts_per_thread; ++i) { + // Initialize variable part (positions 12-53, 42 bytes) + // Start with A-Z pattern like CPU/SIMD miners + for(int j = 12; j < 54; j++) + bytes[j ^ 3] = 'A' + ((j - 12) % 26); - // Apply offset to variable part (increment the coin counter) - for(int pos = 53; pos >= 12 && offset > 0; pos--) - { - u08_t *byte = &bytes[pos ^ 3]; - u64_t add = offset % 127; - offset /= 127; + // Calculate offset based on thread index and parameters + // This creates a unique starting point for each thread + u64_t offset = base_nonce + n + (u64_t)i * gridDim.x * blockDim.x; - u32_t val = *byte + add; - u08_t new_val = val % 127; + // Apply offset to variable part (increment the coin counter) + for(int pos = 53; pos >= 12 && offset > 0; pos--) + { + u08_t *byte = &bytes[pos ^ 3]; + u64_t add = offset % 127; + offset /= 127; - // Skip newline character (ASCII 10) in the variable part - if(new_val == '\n') - new_val++; + u32_t val = *byte + add; + u08_t new_val = val % 127; - *byte = new_val; - offset += val / 127; // Carry - } + // Skip newline character (ASCII 10) in the variable part + if(new_val == '\n') + new_val++; + + *byte = new_val; + offset += val / 127; // Carry + } // Compute SHA1 hash # define T u32_t @@ -80,104 +84,13 @@ void mine_deti_coins_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param // Found a coin! Store it atomically u32_t idx = atomicAdd(coins_storage_area, 14u); - // Make sure we don't write outside buffer - if(idx < 1024u - 14u) - { - // Store the complete coin data - for(int i = 0; i < 14; i++) - coins_storage_area[idx + i] = coin[i]; - } - } -} - -// -// Kernel that tries all possible values for one character position -// - -extern "C" __global__ __launch_bounds__(RECOMMENDED_CUDA_BLOCK_SIZE,1) -void mine_deti_coins_scan_kernel(u32_t *coins_storage_area, u32_t param1, u32_t param2, int scan_position) -{ - u32_t coin[14]; - u32_t hash[5]; - u32_t n; - u08_t *bytes = (u08_t *)coin; - - n = (u32_t)threadIdx.x + (u32_t)blockDim.x * (u32_t)blockIdx.x; - - // Initialize coin template (with byte swap for idx ^ 3 convention) - coin[0] = 0x44455449u; // "DETI" with byte swap - coin[1] = 0x20636F69u; // " coi" with byte swap - coin[2] = 0x6E203220u; // "n 2 " with byte swap - - // Initialize variable part with A-Z pattern - for(int i = 12; i < 54; i++) - bytes[i ^ 3] = 'A' + ((i - 12) % 26); - - // End with newline and padding - bytes[0x36 ^ 3] = '\n'; // Position 54 - bytes[0x37 ^ 3] = 0x80; // Position 55 - - // Apply base offset from parameters (similar to main kernel) - u64_t offset = ((u64_t)param1 << 32) | param2; - offset += (u64_t)n; - - // Apply offset to all positions except the scan position - for(int pos = 53; pos >= 12 && offset > 0; pos--) - { - if(pos == scan_position) - continue; // Skip the scan position - - u08_t *byte = &bytes[pos ^ 3]; - u64_t add = offset % 95; - offset /= 95; - - u32_t val = (*byte - 32 + add); - u08_t new_val = 32 + (val % 95); - - // Skip newline character (ASCII 10) in the variable part - if(new_val == '\n') - new_val++; - - *byte = new_val; - offset += val / 95; - } - - // Try all possible printable ASCII values for the scan position (32-126) - for(u32_t val = 32; val < 127; val++) - { - // Set the test value at the scan position - if(scan_position >= 12 && scan_position < 54) - { - bytes[scan_position ^ 3] = (u08_t)val; - - // Skip newline in the middle (it's only valid at position 54) - if(scan_position != 54 && val == '\n') - continue; - } - - // Compute SHA1 hash -# define T u32_t -# define C(c) (c) -# define ROTATE(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) -# define DATA(idx) coin[idx] -# define HASH(idx) hash[idx] - CUSTOM_SHA1_CODE(); -# undef T -# undef C -# undef ROTATE -# undef DATA -# undef HASH - - // Check if valid coin - if(hash[0] == 0xAAD20250u) - { - u32_t idx = atomicAdd(coins_storage_area, 14u); + // Make sure we don't write outside buffer if(idx < 1024u - 14u) { - for(int i = 0; i < 14; i++) - coins_storage_area[idx + i] = coin[i]; + // Store the complete coin data + for(int k = 0; k < 14; k++) + coins_storage_area[idx + k] = coin[k]; } } } } -