139 lines
5.0 KiB
Plaintext
139 lines
5.0 KiB
Plaintext
#include <cuda_runtime.h>
|
|
#include <curand_kernel.h>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <sha512.cuh>
|
|
#include <ed25519.cuh>
|
|
#include <edsign.cuh>
|
|
#include <string.cuh>
|
|
#include <keymanip.cuh>
|
|
__device__ unsigned d_high = 0x10;
|
|
__device__ int parameters(const char* arg) noexcept {
|
|
if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) || (cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) {
|
|
return 777;
|
|
}
|
|
int space_index = cstring_find(arg, " ");
|
|
if (space_index == -1) return 0;
|
|
const int substr_start = space_index + 1;
|
|
char sub_arg[256];
|
|
extract_substring(arg, substr_start, sub_arg, 256);
|
|
if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) {
|
|
unsigned tmp_high;
|
|
if (cstring_to_ull(sub_arg, &tmp_high) != 0)
|
|
return 1;
|
|
d_high = tmp_high;
|
|
}
|
|
return 0;
|
|
}
|
|
__global__ void args(char** argv, int argc, int* result) {
|
|
int err = 0;
|
|
for (int x = 1; x < argc; x++) {
|
|
int res = parameters(argv[x]);
|
|
if (res == 777) {
|
|
if (++x >= argc) {
|
|
err = 776;
|
|
break;
|
|
}
|
|
char combined[512];
|
|
concat(argv[x - 1], argv[x], combined, 512);
|
|
if (parameters(combined) != 0) {
|
|
err = res;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
result[0] = err;
|
|
}
|
|
__device__ __forceinline__ unsigned char zeroCounter(unsigned int x) noexcept {
|
|
return x ? static_cast<unsigned char>(__clz(x)) : 32;
|
|
}
|
|
__device__ __forceinline__ unsigned char getZeros(const unsigned char* v) noexcept {
|
|
unsigned char leadZeros = 0;
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i += 4) {
|
|
unsigned word = (static_cast<unsigned>(v[i]) << 24) | (static_cast<unsigned>(v[i + 1]) << 16) | (static_cast<unsigned>(v[i + 2]) << 8) | (static_cast<unsigned>(v[i + 3]));
|
|
if (word == 0)
|
|
leadZeros += 32;
|
|
else {
|
|
leadZeros += zeroCounter(word);
|
|
break;
|
|
}
|
|
}
|
|
return leadZeros;
|
|
}
|
|
__global__ void initRand(curandState* rs) {
|
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
|
curand_init(clock64() + id * 7919ULL, id, 0, &rs[id]);
|
|
#pragma unroll 10
|
|
for (int i = 0; i < 10; i++) {
|
|
curand(&rs[id]);
|
|
}
|
|
}
|
|
__device__ __forceinline__ unsigned long long xorshift128plus(unsigned long long* state) noexcept {
|
|
unsigned long long x = state[0];
|
|
const unsigned long long y = state[1];
|
|
state[0] = y;
|
|
x ^= x << 23;
|
|
x ^= x >> 17;
|
|
x ^= y ^ (y >> 26);
|
|
state[1] = x;
|
|
return x + y;
|
|
}
|
|
__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long long* state) noexcept {
|
|
#pragma unroll 32
|
|
for (unsigned long i = 0; i < 32; i++) buf[i] = static_cast<unsigned char>(xorshift128plus(state) & 0xFF);
|
|
}
|
|
__global__ void KeyGen(curandState* randStates) {
|
|
curandState localState = randStates[blockIdx.x * blockDim.x + threadIdx.x];
|
|
unsigned long long xorshiftState[2];
|
|
xorshiftState[0] = curand(&localState);
|
|
xorshiftState[1] = curand(&localState);
|
|
Key32 seed;
|
|
KeysBox32 keys;
|
|
while (true) {
|
|
rmbytes(seed, xorshiftState);
|
|
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
|
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax((unsigned*)&d_high, zeros)) {
|
|
Address raw;
|
|
Key32 inv;
|
|
invertKey(keys.PublicKey, inv);
|
|
getRawAddress(zeros, inv, raw);
|
|
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
|
}
|
|
}
|
|
}
|
|
int main(int argc, char* argv[]) {
|
|
int* d_result;
|
|
cudaMalloc((void**)&d_result, sizeof(int));
|
|
char** d_argv;
|
|
cudaMalloc((void**)&d_argv, argc * sizeof(char*));
|
|
for (int i = 0; i < argc; i++) {
|
|
unsigned long len = strlen(argv[i]) + 1;
|
|
char* d_str;
|
|
cudaMalloc((void**)&d_str, len);
|
|
cudaMemcpy(d_str, argv[i], len, cudaMemcpyHostToDevice);
|
|
cudaMemcpy(&d_argv[i], &d_str, sizeof(char*), cudaMemcpyHostToDevice);
|
|
}
|
|
args<<<1, 1 >>>(d_argv, argc, d_result);
|
|
unsigned h_high;
|
|
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
|
printf("High addresses (2%02x+)\n", h_high);
|
|
const int threadsPerBlock = 256;
|
|
cudaDeviceProp prop;
|
|
cudaGetDeviceProperties(&prop, 0);
|
|
int mBpSM;
|
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, threadsPerBlock, 0);
|
|
const int totalThreads = mBpSM * prop.multiProcessorCount * threadsPerBlock;
|
|
printf("SMs: %d\n", prop.multiProcessorCount);
|
|
printf("MaxBlocksPerSM: %d\n", mBpSM);
|
|
printf("totalThreads: %d\n", totalThreads);
|
|
printf("BlocksThreads: %d:%d\n", totalThreads / threadsPerBlock, threadsPerBlock);
|
|
curandState* rst;
|
|
cudaMalloc(&rst, totalThreads * sizeof(curandState));
|
|
initRand<<<totalThreads / threadsPerBlock, threadsPerBlock>>>(rst);
|
|
cudaDeviceSynchronize();
|
|
KeyGen<<<totalThreads / threadsPerBlock, threadsPerBlock>>>(rst);
|
|
cudaDeviceSynchronize();
|
|
cudaFree(rst);
|
|
return 0;
|
|
} |