fixes
This commit is contained in:
parent
17f6d7be58
commit
e6c4f9ceb5
@ -81,32 +81,31 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
|
||||
buf[i * 4 + 3] = static_cast<unsigned char>((r >> 24) & 0xFF);
|
||||
}
|
||||
}
|
||||
__global__ void KeyGen(curandState* randStates) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
unsigned local_d_high;
|
||||
__global__ void KeyGenKernel(curandState* randStates) {
|
||||
__shared__ unsigned local_high;
|
||||
if (threadIdx.x == 0) {
|
||||
local_d_high = d_high;
|
||||
local_high = d_high;
|
||||
}
|
||||
__syncthreads();
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curandState localState = randStates[idx];
|
||||
while (true) {
|
||||
Key32 seed;
|
||||
KeysBox32 keys;
|
||||
rmbytes(seed, &localState);
|
||||
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
||||
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_d_high) {
|
||||
if (zeros > atomicMax((unsigned*)&d_high, zeros)) {
|
||||
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_high) {
|
||||
if (zeros > atomicMax(&d_high, zeros)) {
|
||||
Addr16 raw;
|
||||
Key32 inv;
|
||||
invertKey(keys.PublicKey, inv);
|
||||
getRawAddress(zeros, inv, raw);
|
||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
||||
local_d_high = zeros;
|
||||
local_high = zeros;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define THREADS_P_B 256
|
||||
int main(int argc, char* argv[]) {
|
||||
int* d_result, mBpSM, h_high;
|
||||
@ -126,13 +125,13 @@ int main(int argc, char* argv[]) {
|
||||
cudaDeviceSynchronize();
|
||||
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
||||
cudaGetDeviceProperties_v2(&prop, 0);
|
||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, THREADS_P_B, 0);
|
||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADS_P_B, 0);
|
||||
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
|
||||
printf("High addrs: 2%02x+\nSMs: %d\nMaxBlocksPerSM: %d\nTotalTh: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, mBpSM, tTh, tTh / THREADS_P_B, THREADS_P_B);
|
||||
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADS_P_B, THREADS_P_B);
|
||||
cudaMalloc(&rst, tTh * sizeof(curandState));
|
||||
initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
||||
cudaDeviceSynchronize();
|
||||
KeyGen<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
||||
KeyGenKernel<<<tTh / THREADS_P_B, THREADS_P_B >>>(rst);
|
||||
cudaDeviceSynchronize();
|
||||
cudaFree(rst);
|
||||
return 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user