This commit is contained in:
rcxpony 2025-03-15 02:32:15 +05:00
parent 36abc6a655
commit 978db38a18
5 changed files with 34 additions and 32 deletions

View File

@ -1,11 +1,18 @@
NVCC := nvcc NVCC := nvcc
NVCC_FLAGS := -rdc=true -O3 -Xptxas -O3 \ NVCC_FLAGS := -rdc=true -O3 -Xptxas -O3 \
-gencode arch=compute_75,code=sm_75 \ -use_fast_math -ftz=true -prec-div=false -prec-sqrt=false \
--default-stream per-thread \ -gencode arch=compute_75,code=sm_75 \
-Wno-deprecated-gpu-targets \ --default-stream per-thread \
--expt-relaxed-constexpr \ -Wno-deprecated-gpu-targets \
-I../libs/ \ --expt-relaxed-constexpr \
-std=c++20 -I../libs/ \
-std=c++17
BUILD ?= RELEASE
ifeq ($(BUILD),DEBUG)
BUILD_DEFINES := -DDEBUG
else
BUILD_DEFINES := -DRELEASE
endif
MAIN_SOURCE := ../sources/main.cu MAIN_SOURCE := ../sources/main.cu
LIBS_DIR := ../libs/ LIBS_DIR := ../libs/
BUILD_DIR := ../build BUILD_DIR := ../build

View File

@ -34,7 +34,7 @@ __device__ void ed25519_unproject(unsigned char* x, unsigned char* y, const stru
f25519_normalize(x); f25519_normalize(x);
f25519_normalize(y); f25519_normalize(y);
} }
__device__ void ed25519_pack(unsigned char* c, const unsigned char* x, const unsigned char* y) { __device__ void ed25519_pack(unsigned char* __restrict__ c, const unsigned char* x, const unsigned char* y) {
unsigned char tmp[F25519_SIZE]; unsigned char tmp[F25519_SIZE];
unsigned char parity; unsigned char parity;
f25519_copy(tmp, x); f25519_copy(tmp, x);

View File

@ -23,10 +23,17 @@ __device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secre
expand_key(expanded, secret); expand_key(expanded, secret);
sm_pack(pub, expanded); sm_pack(pub, expanded);
} }
__device__ void compact_wipe(void* data, unsigned long length) { __device__ void compact_wipe(void* __restrict__ data, unsigned long length) {
volatile unsigned char* p = (volatile unsigned char*)data; volatile unsigned char* p = (volatile unsigned char*)data;
while (length--) { unsigned long i = 0;
*p++ = 0; for (; i + 3 < length; i += 4) {
p[i] = 0;
p[i + 1] = 0;
p[i + 2] = 0;
p[i + 3] = 0;
}
for (; i < length; i++) {
p[i] = 0;
} }
} }
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]) { __device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]) {

View File

@ -1,10 +1,9 @@
#ifndef __EDSIGN_CUH #ifndef __EDSIGN_CUH
#define __EDSIGN_CUH #define __EDSIGN_CUH
#define F25519_SIZE 32
__device__ void expand_key(unsigned char* expanded, const unsigned char* secret); __device__ void expand_key(unsigned char* expanded, const unsigned char* secret);
__device__ void pp(unsigned char* packed, const struct ed25519_pt* p); __device__ void pp(unsigned char* packed, const struct ed25519_pt* p);
__device__ void sm_pack(unsigned char* r, const unsigned char* k); __device__ void sm_pack(unsigned char* r, const unsigned char* k);
__device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret); __device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret);
__device__ void compact_wipe(void* data, unsigned long length); __device__ void compact_wipe(void* __restrict__ data, unsigned long length);
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]); __device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]);
#endif #endif

View File

@ -69,29 +69,18 @@ __global__ void initRand(curandState* rs) {
curand(&rs[id]); curand(&rs[id]);
} }
} }
__device__ __forceinline__ unsigned long long xorshift128plus(unsigned long long* state) noexcept { __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state) {
unsigned long long x = state[0];
const unsigned long long y = state[1];
state[0] = y;
x ^= x << 23;
x ^= x >> 17;
x ^= y ^ (y >> 26);
state[1] = x;
return x + y;
}
__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long long* state) noexcept {
#pragma unroll 32 #pragma unroll 32
for (unsigned long i = 0; i < 32; i++) buf[i] = static_cast<unsigned char>(xorshift128plus(state) & 0xFF); for (unsigned long i = 0; i < 32; i++) {
buf[i] = curand(state) & 0xFF;
}
} }
__global__ void KeyGen(curandState* randStates) { __global__ void KeyGen(curandState* randStates) {
curandState localState = randStates[blockIdx.x * blockDim.x + threadIdx.x]; curandState localState = randStates[blockIdx.x * blockDim.x + threadIdx.x];
unsigned long long xorshiftState[2];
xorshiftState[0] = curand(&localState);
xorshiftState[1] = curand(&localState);
Key32 seed;
KeysBox32 keys;
while (true) { while (true) {
rmbytes(seed, xorshiftState); KeysBox32 keys;
Key32 seed;
rmbytes(seed, &localState);
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed); ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax((unsigned*)&d_high, zeros)) { if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax((unsigned*)&d_high, zeros)) {
Address raw; Address raw;