fixes
This commit is contained in:
parent
17f6d7be58
commit
e6c4f9ceb5
@ -81,32 +81,31 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
|
|||||||
buf[i * 4 + 3] = static_cast<unsigned char>((r >> 24) & 0xFF);
|
buf[i * 4 + 3] = static_cast<unsigned char>((r >> 24) & 0xFF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__global__ void KeyGen(curandState* randStates) {
|
__global__ void KeyGenKernel(curandState* randStates) {
|
||||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
__shared__ unsigned local_high;
|
||||||
unsigned local_d_high;
|
|
||||||
if (threadIdx.x == 0) {
|
if (threadIdx.x == 0) {
|
||||||
local_d_high = d_high;
|
local_high = d_high;
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
curandState localState = randStates[idx];
|
curandState localState = randStates[idx];
|
||||||
while (true) {
|
while (true) {
|
||||||
Key32 seed;
|
Key32 seed;
|
||||||
KeysBox32 keys;
|
KeysBox32 keys;
|
||||||
rmbytes(seed, &localState);
|
rmbytes(seed, &localState);
|
||||||
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
||||||
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_d_high) {
|
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_high) {
|
||||||
if (zeros > atomicMax((unsigned*)&d_high, zeros)) {
|
if (zeros > atomicMax(&d_high, zeros)) {
|
||||||
Addr16 raw;
|
Addr16 raw;
|
||||||
Key32 inv;
|
Key32 inv;
|
||||||
invertKey(keys.PublicKey, inv);
|
invertKey(keys.PublicKey, inv);
|
||||||
getRawAddress(zeros, inv, raw);
|
getRawAddress(zeros, inv, raw);
|
||||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
||||||
local_d_high = zeros;
|
local_high = zeros;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define THREADS_P_B 256
|
#define THREADS_P_B 256
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
int* d_result, mBpSM, h_high;
|
int* d_result, mBpSM, h_high;
|
||||||
@ -126,13 +125,13 @@ int main(int argc, char* argv[]) {
|
|||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
||||||
cudaGetDeviceProperties_v2(&prop, 0);
|
cudaGetDeviceProperties_v2(&prop, 0);
|
||||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, THREADS_P_B, 0);
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADS_P_B, 0);
|
||||||
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
|
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
|
||||||
printf("High addrs: 2%02x+\nSMs: %d\nMaxBlocksPerSM: %d\nTotalTh: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, mBpSM, tTh, tTh / THREADS_P_B, THREADS_P_B);
|
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADS_P_B, THREADS_P_B);
|
||||||
cudaMalloc(&rst, tTh * sizeof(curandState));
|
cudaMalloc(&rst, tTh * sizeof(curandState));
|
||||||
initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
KeyGen<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
KeyGenKernel<<<tTh / THREADS_P_B, THREADS_P_B >>>(rst);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
cudaFree(rst);
|
cudaFree(rst);
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user