diff --git a/libs/ed25519.cu b/libs/ed25519.cu index ee98511..316d845 100644 --- a/libs/ed25519.cu +++ b/libs/ed25519.cu @@ -94,7 +94,7 @@ __device__ __forceinline__ void ed25519_copy(struct ed25519_pt* dst, const struc } __device__ void ed25519_smult(struct ed25519_pt* r_out, const struct ed25519_pt* p, const unsigned char* e) { struct ed25519_pt r = ed25519_neutral; -#pragma unroll 256 +#pragma unroll for (int i = 255; i >= 0; i--) { struct ed25519_pt s; ed25519_double(&r, &r); diff --git a/libs/keymanip.cu b/libs/keymanip.cu new file mode 100644 index 0000000..f260a9b --- /dev/null +++ b/libs/keymanip.cu @@ -0,0 +1,46 @@ +#include +__device__ ds64 ktos(const unsigned char* key) noexcept { + ds64 str; + const char* hexDigits = "0123456789abcdef"; +#pragma unroll 32 + for (unsigned char i = 0; i < 32; i++) { + str.data[2 * i] = hexDigits[key[i] >> 4]; + str.data[2 * i + 1] = hexDigits[key[i] & 0x0F]; + } + str.data[65] = '\0'; + return str; +} +__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept { + ds46 addrStr; + const char* hexDigits = "0123456789abcdef"; + unsigned pos = 0; +#pragma unroll 8 + for (unsigned char group = 0; group < 8; group++) { + int idx = group * 2; + addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4]; + addrStr.data[pos++] = hexDigits[rawAddr[idx] & 0x0F]; + addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] >> 4]; + addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] & 0x0F]; + if (group < 7) { addrStr.data[pos++] = ':'; } + } + addrStr.data[pos] = '\0'; + return addrStr; +} +__device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Address& rawAddr) noexcept { + lErase++; + const int bitsToShift = lErase & 7; + const int start = lErase >> 3; + if (bitsToShift) { + #pragma unroll + for (int i = start; i < start + 15; i++) { + InvertedPublicKey[i] = static_cast((InvertedPublicKey[i] << bitsToShift) | (InvertedPublicKey[i + 1] >> (8 - bitsToShift))); + } + } + rawAddr[0] = 0x02; + rawAddr[1] = static_cast(lErase - 1); + memcpy(&rawAddr[2], &InvertedPublicKey[start], 14); +} +__device__ void invertKey(const unsigned char* key, unsigned char* inverted) { +#pragma unroll 32 + for (unsigned char i = 0; i < 32; i++) inverted[i] = key[i] ^ 0xFF; +} \ No newline at end of file diff --git a/libs/keymanip.cuh b/libs/keymanip.cuh new file mode 100644 index 0000000..4b30088 --- /dev/null +++ b/libs/keymanip.cuh @@ -0,0 +1,19 @@ +#ifndef __KEYMANIP_CUH +#define __KEYMANIP_CUH +struct ds64 { + char data[65]; +}; +struct ds46 { + char data[46]; +}; +using Address = unsigned char[16]; +using Key32 = unsigned char[32]; +struct KeysBox32 { + Key32 PublicKey; + Key32 PrivateKey; +}; +__device__ ds64 ktos(const unsigned char* key) noexcept; +__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept; +__device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Address& rawAddr) noexcept; +__device__ void invertKey(const unsigned char* key, unsigned char* inverted); +#endif \ No newline at end of file diff --git a/libs/sha512.cu b/libs/sha512.cu index ef87985..7500c5d 100644 --- a/libs/sha512.cu +++ b/libs/sha512.cu @@ -64,7 +64,7 @@ __device__ __forceinline__ void store64(unsigned char* x, unsigned long v) { __device__ __forceinline__ unsigned long rot64(unsigned long x, int bits) { return (x >> bits) | (x << (64 - bits)); } -__device__ void sha512_block(sha512_state* s, const unsigned char* blk) { +__device__ __forceinline__ void sha512_block(sha512_state* s, const unsigned char* blk) { unsigned long w[16]; #pragma unroll 16 for (int i = 0; i < 16; i++) { @@ -127,10 +127,8 @@ __device__ void sha512_final(sha512_state* s, const unsigned char* blk, unsigned sha512_block(s, temp); } __device__ void sha512_get(const sha512_state* s, unsigned char* hash, unsigned int offset, unsigned int len) { - if (offset > 128) - return; - if (len > 128 - offset) - len = 128 - offset; + if (offset > 128) return; + if (len > 128 - offset) len = 128 - offset; unsigned int i = offset >> 3; unsigned int off = offset & 7; if (off) { diff --git a/libs/sha512.cuh b/libs/sha512.cuh index ac5755a..6d1225d 100644 --- a/libs/sha512.cuh +++ b/libs/sha512.cuh @@ -8,7 +8,7 @@ extern __device__ __constant__ unsigned long round_k[80]; __device__ __forceinline__ unsigned long load64(const unsigned char* x); __device__ __forceinline__ void store64(unsigned char* x, unsigned long v); __device__ __forceinline__ unsigned long rot64(unsigned long x, int bits); -__device__ void sha512_block(sha512_state* s, const unsigned char* blk); +__device__ __forceinline__ void sha512_block(sha512_state* s, const unsigned char* blk); __device__ void sha512_final(sha512_state* s, const unsigned char* blk, unsigned long total_size); __device__ void sha512_get(const sha512_state* s, unsigned char* hash, unsigned int offset, unsigned int len); __device__ void sha512_init(sha512_state* s); diff --git a/sources/main.cpp b/sources/main.cpp index 98c38ce..9df4c17 100644 --- a/sources/main.cpp +++ b/sources/main.cpp @@ -127,14 +127,13 @@ void miner_thread() noexcept { alignas(32) Key seed; KeysBox keys; Address rawAddr; - unsigned char ones = 0; std::random_device rd; unsigned long state = static_cast(rd()); printf("Using seed: %lu\n", state); while (true) { rmbytes(seed, sizeof(seed), state); crypto_sign_ed25519_seed_keypair(keys.PublicKey, keys.PrivateKey, seed); - if (ones = getZeros(keys.PublicKey); ones > conf.high.load()) { + if (unsigned char ones = getZeros(keys.PublicKey); ones > conf.high.load()) { conf.high.store(ones); invertKey(keys.PublicKey, inv); getRawAddress(ones, inv, rawAddr); diff --git a/sources/main.cu b/sources/main.cu index a156703..6ebdb2e 100644 --- a/sources/main.cu +++ b/sources/main.cu @@ -6,33 +6,23 @@ #include #include #include -using Address = unsigned char[16]; -using Key = unsigned char[32]; -struct KeysBox { - Key PublicKey; - Key PrivateKey; -}; +#include __device__ unsigned d_high = 0x10; -__device__ int parameters(const char* arg) { - int space_index = cstring_find(arg, " "); - if (space_index != -1) { - int substr_start = space_index + 1; - int arg_len = cstring_length(arg); - int substr_len = arg_len - substr_start + 1; - char sub_arg[256]; - if (substr_len > 256) substr_len = 256; - extract_substring(arg, substr_start, sub_arg, substr_len); - if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) { - unsigned tmp_high; - int ret = cstring_to_ull(sub_arg, &tmp_high); - if (ret != 0) return 1; - d_high = tmp_high; - return 0; - } - } +__device__ int parameters(const char* arg) noexcept { if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) || (cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) { return 777; } + int space_index = cstring_find(arg, " "); + if (space_index == -1) return 0; + const int substr_start = space_index + 1; + char sub_arg[256]; + extract_substring(arg, substr_start, sub_arg, 256); + if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) { + unsigned tmp_high; + if (cstring_to_ull(sub_arg, &tmp_high) != 0) + return 1; + d_high = tmp_high; + } return 0; } __global__ void args(char** argv, int argc, int* result) { @@ -54,57 +44,10 @@ __global__ void args(char** argv, int argc, int* result) { } result[0] = err; } -struct ds64 { - char data[65]; -}; -struct ds46 { - char data[46]; -}; -__device__ ds64 ktos(const unsigned char* key) noexcept { - ds64 str; - const char* hexDigits = "0123456789abcdef"; -#pragma unroll 32 - for (unsigned char i = 0; i < 32; i++) { - str.data[2 * i] = hexDigits[key[i] >> 4]; - str.data[2 * i + 1] = hexDigits[key[i] & 0x0F]; - } - str.data[65] = '\0'; - return str; -} -__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept { - ds46 addrStr; - const char* hexDigits = "0123456789abcdef"; - unsigned pos = 0; -#pragma unroll 8 - for (unsigned char group = 0; group < 8; group++) { - int idx = group * 2; - addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4]; - addrStr.data[pos++] = hexDigits[rawAddr[idx] & 0x0F]; - addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] >> 4]; - addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] & 0x0F]; - if (group < 7) { addrStr.data[pos++] = ':'; } - } - addrStr.data[pos] = '\0'; - return addrStr; -} -__device__ __forceinline__ void getRawAddress(int lErase, Key& InvertedPublicKey, Address& rawAddr) noexcept { - lErase++; - const int bitsToShift = lErase & 7; - const int start = lErase >> 3; - if (bitsToShift) { - #pragma unroll - for (int i = start; i < start + 15; i++) { - InvertedPublicKey[i] = static_cast((InvertedPublicKey[i] << bitsToShift) | (InvertedPublicKey[i + 1] >> (8 - bitsToShift))); - } - } - rawAddr[0] = 0x02; - rawAddr[1] = static_cast(lErase - 1); - memcpy(&rawAddr[2], &InvertedPublicKey[start], 14); -} -__device__ __forceinline__ unsigned char zeroCounter(unsigned int x) { +__device__ __forceinline__ unsigned char zeroCounter(unsigned int x) noexcept { return x ? static_cast(__clz(x)) : 32; } -__device__ __forceinline__ unsigned char getZeros(const unsigned char* v) { +__device__ __forceinline__ unsigned char getZeros(const unsigned char* v) noexcept { unsigned char leadZeros = 0; #pragma unroll for (int i = 0; i < 32; i += 4) { @@ -118,9 +61,13 @@ __device__ __forceinline__ unsigned char getZeros(const unsigned char* v) { } return leadZeros; } -__global__ void initRand(curandState* randStates) { +__global__ void initRand(curandState* rs) { int id = blockIdx.x * blockDim.x + threadIdx.x; - curand_init(static_cast(clock64()) + id, id, 0, &randStates[id]); + curand_init(clock64() + id * 7919ULL, id, 0, &rs[id]); +#pragma unroll 10 + for (int i = 0; i < 10; i++) { + curand(&rs[id]); + } } __device__ __forceinline__ unsigned long long xorshift128plus(unsigned long long* state) noexcept { unsigned long long x = state[0]; @@ -132,27 +79,23 @@ __device__ __forceinline__ unsigned long long xorshift128plus(unsigned long long state[1] = x; return x + y; } -__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long long* state) { +__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long long* state) noexcept { #pragma unroll 32 for (unsigned long i = 0; i < 32; i++) buf[i] = static_cast(xorshift128plus(state) & 0xFF); } -__device__ __forceinline__ void invertKey(const unsigned char* key, unsigned char* inverted) { -#pragma unroll 32 - for (unsigned char i = 0; i < 32; i++) inverted[i] = key[i] ^ 0xFF; -} __global__ void KeyGen(curandState* randStates) { curandState localState = randStates[blockIdx.x * blockDim.x + threadIdx.x]; unsigned long long xorshiftState[2]; xorshiftState[0] = curand(&localState); xorshiftState[1] = curand(&localState); - Key seed; - KeysBox keys; + Key32 seed; + KeysBox32 keys; while (true) { rmbytes(seed, xorshiftState); ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed); if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax((unsigned*)&d_high, zeros)) { Address raw; - Key inv; + Key32 inv; invertKey(keys.PublicKey, inv); getRawAddress(zeros, inv, raw); printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data); @@ -175,7 +118,7 @@ int main(int argc, char* argv[]) { unsigned h_high; cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned)); printf("High addresses (2%02x+)\n", h_high); - const int threadsPerBlock = 128; + const int threadsPerBlock = 256; cudaDeviceProp prop; cudaGetDeviceProperties(&prop, 0); int mBpSM;