This commit is contained in:
rcxpony 2025-03-15 19:36:02 +05:00
parent 17f6d7be58
commit e6c4f9ceb5

View File

@ -81,32 +81,31 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
buf[i * 4 + 3] = static_cast<unsigned char>((r >> 24) & 0xFF); buf[i * 4 + 3] = static_cast<unsigned char>((r >> 24) & 0xFF);
} }
} }
__global__ void KeyGen(curandState* randStates) { __global__ void KeyGenKernel(curandState* randStates) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; __shared__ unsigned local_high;
unsigned local_d_high;
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
local_d_high = d_high; local_high = d_high;
} }
__syncthreads(); __syncthreads();
int idx = blockIdx.x * blockDim.x + threadIdx.x;
curandState localState = randStates[idx]; curandState localState = randStates[idx];
while (true) { while (true) {
Key32 seed; Key32 seed;
KeysBox32 keys; KeysBox32 keys;
rmbytes(seed, &localState); rmbytes(seed, &localState);
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed); ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_d_high) { if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_high) {
if (zeros > atomicMax((unsigned*)&d_high, zeros)) { if (zeros > atomicMax(&d_high, zeros)) {
Addr16 raw; Addr16 raw;
Key32 inv; Key32 inv;
invertKey(keys.PublicKey, inv); invertKey(keys.PublicKey, inv);
getRawAddress(zeros, inv, raw); getRawAddress(zeros, inv, raw);
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data); printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
local_d_high = zeros; local_high = zeros;
} }
} }
} }
} }
#define THREADS_P_B 256 #define THREADS_P_B 256
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
int* d_result, mBpSM, h_high; int* d_result, mBpSM, h_high;
@ -126,13 +125,13 @@ int main(int argc, char* argv[]) {
cudaDeviceSynchronize(); cudaDeviceSynchronize();
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned)); cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
cudaGetDeviceProperties_v2(&prop, 0); cudaGetDeviceProperties_v2(&prop, 0);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, THREADS_P_B, 0); cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADS_P_B, 0);
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B; const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
printf("High addrs: 2%02x+\nSMs: %d\nMaxBlocksPerSM: %d\nTotalTh: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, mBpSM, tTh, tTh / THREADS_P_B, THREADS_P_B); printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADS_P_B, THREADS_P_B);
cudaMalloc(&rst, tTh * sizeof(curandState)); cudaMalloc(&rst, tTh * sizeof(curandState));
initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst); initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
KeyGen<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst); KeyGenKernel<<<tTh / THREADS_P_B, THREADS_P_B >>>(rst);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
cudaFree(rst); cudaFree(rst);
return 0; return 0;