This commit is contained in:
rcxpony 2025-03-15 19:36:02 +05:00
parent 17f6d7be58
commit e6c4f9ceb5

View File

@ -81,32 +81,31 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
buf[i * 4 + 3] = static_cast<unsigned char>((r >> 24) & 0xFF);
}
}
__global__ void KeyGen(curandState* randStates) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
unsigned local_d_high;
__global__ void KeyGenKernel(curandState* randStates) {
__shared__ unsigned local_high;
if (threadIdx.x == 0) {
local_d_high = d_high;
local_high = d_high;
}
__syncthreads();
int idx = blockIdx.x * blockDim.x + threadIdx.x;
curandState localState = randStates[idx];
while (true) {
Key32 seed;
KeysBox32 keys;
rmbytes(seed, &localState);
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_d_high) {
if (zeros > atomicMax((unsigned*)&d_high, zeros)) {
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_high) {
if (zeros > atomicMax(&d_high, zeros)) {
Addr16 raw;
Key32 inv;
invertKey(keys.PublicKey, inv);
getRawAddress(zeros, inv, raw);
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
local_d_high = zeros;
local_high = zeros;
}
}
}
}
#define THREADS_P_B 256
int main(int argc, char* argv[]) {
int* d_result, mBpSM, h_high;
@ -126,13 +125,13 @@ int main(int argc, char* argv[]) {
cudaDeviceSynchronize();
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
cudaGetDeviceProperties_v2(&prop, 0);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, THREADS_P_B, 0);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADS_P_B, 0);
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
printf("High addrs: 2%02x+\nSMs: %d\nMaxBlocksPerSM: %d\nTotalTh: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, mBpSM, tTh, tTh / THREADS_P_B, THREADS_P_B);
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADS_P_B, THREADS_P_B);
cudaMalloc(&rst, tTh * sizeof(curandState));
initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
cudaDeviceSynchronize();
KeyGen<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
KeyGenKernel<<<tTh / THREADS_P_B, THREADS_P_B >>>(rst);
cudaDeviceSynchronize();
cudaFree(rst);
return 0;