From 701139d0bd0e3da385b38bd4e255c64975d3c9d3 Mon Sep 17 00:00:00 2001 From: rcxpony Date: Fri, 14 Mar 2025 22:03:46 +0500 Subject: [PATCH] test --- build/Makefile | 3 +- libs/string.cuh | 66 ++++++++++++++++++++++++ sources/main.cu | 134 ++++++++++++++++++++++++++++++++++++------------ 3 files changed, 167 insertions(+), 36 deletions(-) create mode 100644 libs/string.cuh diff --git a/build/Makefile b/build/Makefile index 976b02b..e2225b6 100644 --- a/build/Makefile +++ b/build/Makefile @@ -1,6 +1,5 @@ NVCC := nvcc -NVCC_FLAGS := -O3 -use_fast_math -Xptxas -O3 -gencode arch=compute_75,code=sm_75 \ - --default-stream per-thread -Wno-deprecated-gpu-targets --expt-relaxed-constexpr -I../libs/ +NVCC_FLAGS := -O3 -use_fast_math -Xptxas -O3 -gencode arch=compute_75,code=sm_75 --default-stream per-thread -Wno-deprecated-gpu-targets --expt-relaxed-constexpr -I../libs/ MAIN_SOURCE := ../sources/main.cu LIBS_DIR := ../libs/ BUILD_DIR := ../build diff --git a/libs/string.cuh b/libs/string.cuh new file mode 100644 index 0000000..f49dda9 --- /dev/null +++ b/libs/string.cuh @@ -0,0 +1,66 @@ +__device__ int cstring_length(const char* s) { + int len = 0; + while (s[len]) len++; + return len; +} +__device__ int cstring_find(const char* s, const char* sub) { + int i, j; + int n = cstring_length(s); + int m = cstring_length(sub); + if (m == 0) return 0; + for (i = 0; i <= n - m; i++) { + for (j = 0; j < m; j++) { + if (s[i + j] != sub[j]) break; + } + if (j == m) return i; + } + return -1; +} +__device__ int cstring_to_ull(const char* s, unsigned* val) { + unsigned result = 0; + int i = 0; + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + i = 2; + } + if (s[i] == '\0') return 1; + for (; s[i]; i++) { + char c = s[i]; + int digit; + if (c >= '0' && c <= '9') { + digit = c - '0'; + } else if (c >= 'a' && c <= 'f') { + digit = 10 + (c - 'a'); + } else if (c >= 'A' && c <= 'F') { + digit = 10 + (c - 'A'); + } else { + return 1; + } + result = result * 16 + digit; + } + *val = result; + return 0; +} +__device__ void extract_substring(const char* src, int start, char* dest, int dest_size) { + int i = 0; + while (src[start + i] && i < dest_size - 1) { + dest[i] = src[start + i]; + i++; + } + dest[i] = '\0'; +} +__device__ void concat(const char* s1, const char* s2, char* out, int outSize) { + int i = 0, j = 0; + while (s1[i] && i < outSize - 1) { + out[i] = s1[i]; + i++; + } + if (i < outSize - 1) { + out[i] = ' '; + i++; + } + while (s2[j] && i < outSize - 1) { + out[i] = s2[j]; + i++; j++; + } + out[i] = '\0'; +} diff --git a/sources/main.cu b/sources/main.cu index ae7e557..71d03e7 100644 --- a/sources/main.cu +++ b/sources/main.cu @@ -1,25 +1,70 @@ #include +#include +#include #include #include #include #include #include +#include using Address = unsigned char[16]; using Key = unsigned char[32]; struct KeysBox { Key PublicKey; Key PrivateKey; }; -__device__ static unsigned high = 0x10; +__device__ unsigned high = 0x10; +__device__ int parameters(const char* arg) { + int space_index = cstring_find(arg, " "); + if (space_index != -1) { + int substr_start = space_index + 1; + int arg_len = cstring_length(arg); + int substr_len = arg_len - substr_start + 1; + char sub_arg[256]; + if (substr_len > 256) substr_len = 256; + extract_substring(arg, substr_start, sub_arg, substr_len); + if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) { + unsigned tmp_high; + int ret = cstring_to_ull(sub_arg, &tmp_high); + if (ret != 0) return 1; + high = tmp_high; + return 0; + } + } + if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) || (cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) { + return 777; + } + return 0; +} +__global__ void args(char** argv, int argc, int* result) { + int err = 0; + for (int x = 1; x < argc; x++) { + int res = parameters(argv[x]); + if (res == 777) { + if (++x >= argc) { + err = 776; + break; + } + char combined[512]; + concat(argv[x - 1], argv[x], combined, 512); + if (parameters(combined) != 0) { + err = res; + break; + } + } + } + result[0] = err; +} struct ds64 { char data[65]; }; struct ds46 { char data[46]; }; -__device__ ds64 KeyToString(const unsigned char* key) noexcept { +__device__ ds64 ktos(const unsigned char* key) noexcept { ds64 str; const char* hexDigits = "0123456789abcdef"; +#pragma unroll for (unsigned char i = 0; i < 32; i++) { str.data[2 * i] = hexDigits[key[i] >> 4]; str.data[2 * i + 1] = hexDigits[key[i] & 0x0F]; @@ -27,28 +72,28 @@ __device__ ds64 KeyToString(const unsigned char* key) noexcept { str.data[64] = '\0'; return str; } -__device__ ds46 getAddress(const unsigned char rawAddr[16]) noexcept { +__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept { ds46 addrStr; const char* hexDigits = "0123456789abcdef"; unsigned pos = 0; +#pragma unroll for (unsigned char group = 0; group < 8; group++) { int idx = group * 2; addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4]; addrStr.data[pos++] = hexDigits[rawAddr[idx] & 0x0F]; addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] >> 4]; addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] & 0x0F]; - if (group < 7) { - addrStr.data[pos++] = ':'; - } + if (group < 7) { addrStr.data[pos++] = ':'; } } addrStr.data[pos] = '\0'; return addrStr; } -__device__ void getRawAddress(int lErase, Key& InvertedPublicKey, Address& rawAddr) noexcept { +__device__ __forceinline__ void getRawAddress(int lErase, Key& InvertedPublicKey, Address& rawAddr) noexcept { lErase++; - const int bitsToShift = lErase % 8; - const int start = lErase / 8; - if (bitsToShift != 0) { + const int bitsToShift = lErase & 7; + const int start = lErase >> 3; + if (bitsToShift) { + #pragma unroll for (int i = start; i < start + 15; i++) { InvertedPublicKey[i] = static_cast((InvertedPublicKey[i] << bitsToShift) | (InvertedPublicKey[i + 1] >> (8 - bitsToShift))); } @@ -57,17 +102,17 @@ __device__ void getRawAddress(int lErase, Key& InvertedPublicKey, Address& rawAd rawAddr[1] = static_cast(lErase - 1); memcpy(&rawAddr[2], &InvertedPublicKey[start], 14); } -__device__ unsigned char zeroCounter(unsigned int x) { - if (x == 0) return 32; - return static_cast(__builtin_clz(x)); +__device__ __forceinline__ unsigned char zeroCounter(unsigned int x) { + return x ? static_cast(__clz(x)) : 32; } -__device__ unsigned char getZeros(const unsigned char* v) { +__device__ __forceinline__ unsigned char getZeros(const unsigned char* v) { unsigned char leadZeros = 0; +#pragma unroll for (int i = 0; i < 32; i += 4) { unsigned word = (static_cast(v[i]) << 24) | (static_cast(v[i + 1]) << 16) | (static_cast(v[i + 2]) << 8) | (static_cast(v[i + 3])); - if (word == 0) { + if (word == 0) leadZeros += 32; - } else { + else { leadZeros += zeroCounter(word); break; } @@ -76,9 +121,9 @@ __device__ unsigned char getZeros(const unsigned char* v) { } __global__ void initRand(curandState* randStates) { int id = blockIdx.x * blockDim.x + threadIdx.x; - curand_init((unsigned long long)clock64() + id, id, 0, &randStates[id]); + curand_init(static_cast(clock64()) + id, id, 0, &randStates[id]); } -__device__ unsigned long long xorshift128plus(unsigned long long* state) noexcept { +__device__ __forceinline__ unsigned long long xorshift128plus(unsigned long long* state) noexcept { unsigned long long x = state[0]; const unsigned long long y = state[1]; state[0] = y; @@ -88,37 +133,57 @@ __device__ unsigned long long xorshift128plus(unsigned long long* state) noexcep state[1] = x; return x + y; } -__device__ void rmbytes(unsigned char* buf, unsigned long size, unsigned long long* state) { +__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long size, unsigned long long* state) { +#pragma unroll for (unsigned long i = 0; i < size; i++) { - buf[i] = xorshift128plus(state) & 0xFF; + buf[i] = static_cast(xorshift128plus(state) & 0xFF); } } -__device__ void invertKey(const unsigned char* key, unsigned char* inverted) { - for (int i = 0; i < 32; i++) +__device__ __forceinline__ void invertKey(const unsigned char* key, unsigned char* inverted) { +#pragma unroll + for (unsigned char i = 0; i < 32; i++) inverted[i] = key[i] ^ 0xFF; } __global__ void minerKernel(curandState* randStates) { - int thid = blockIdx.x * blockDim.x + threadIdx.x; - curandState localState = randStates[thid]; + curandState localState = randStates[blockIdx.x * blockDim.x + threadIdx.x]; unsigned long long xorshiftState[2]; xorshiftState[0] = curand(&localState); xorshiftState[1] = curand(&localState); Key seed; - rmbytes(seed, sizeof(seed), xorshiftState); - if (thid == 0) printf("Seed: %s\n", KeyToString(seed).data); while (true) { + rmbytes(seed, sizeof(seed), xorshiftState); KeysBox keys; ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed); - if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax(&high, (unsigned)zeros)) { - printf("\nIPv6:\t%x\nPK:\t%s\nSK:\t%s\n", zeros, KeyToString(keys.PublicKey).data, KeyToString(keys.PrivateKey).data); + unsigned zeros = getZeros(keys.PublicKey); + if (zeros > atomicMax((unsigned*)&high, zeros)) { + Address raw; + Key inv; + invertKey(keys.PublicKey, inv); + getRawAddress(zeros, inv, raw); + printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\nFK:\t%s%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data, ktos(keys.PrivateKey).data, ktos(keys.PublicKey).data); } - rmbytes(seed, sizeof(seed), xorshiftState); } } -int main() { +int main(int argc, char* argv[]) { + if (argc < 2) return 0; + int* d_result; + cudaMalloc((void**)&d_result, sizeof(int)); + char** d_argv; + cudaMalloc((void**)&d_argv, argc * sizeof(char*)); + for (int i = 0; i < argc; i++) { + size_t len = strlen(argv[i]) + 1; + char* d_str; + cudaMalloc((void**)&d_str, len); + cudaMemcpy(d_str, argv[i], len, cudaMemcpyHostToDevice); + cudaMemcpy(&d_argv[i], &d_str, sizeof(char*), cudaMemcpyHostToDevice); + } + args<<<1, 1 >>>(d_argv, argc, d_result); + unsigned h_high; + cudaMemcpyFromSymbol(&h_high, high, sizeof(unsigned)); + printf("High addresses (2%02x+)\n", h_high); const int threadsPerBlock = 256; cudaDeviceProp prop; - cudaGetDeviceProperties_v2(&prop, 0); + cudaGetDeviceProperties(&prop, 0); int mBpSM; cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, minerKernel, threadsPerBlock, 0); int SMs = prop.multiProcessorCount; @@ -128,12 +193,13 @@ int main() { printf("maxBlocks: %d\n", maxBlocks); printf("totalThreads: %d\n", totalThreads); printf("MaxBlocksPerSM: %d\n", mBpSM); + printf("Current config: <<<%d,%d>>>\n", totalThreads / threadsPerBlock, threadsPerBlock); curandState* rst; cudaMalloc(&rst, totalThreads * sizeof(curandState)); - initRand<<<100, threadsPerBlock >>>(rst); + initRand<<>>(rst); cudaDeviceSynchronize(); - minerKernel<<<100, threadsPerBlock>>>(rst); + minerKernel<<>>(rst); cudaDeviceSynchronize(); cudaFree(rst); return 0; -} +} \ No newline at end of file