fixes
This commit is contained in:
parent
4f5a8129ae
commit
735cc0c467
@ -41,6 +41,7 @@ __device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Addr16& rawA
|
|||||||
memcpy(&rawAddr[2], &InvertedPublicKey[start], 14);
|
memcpy(&rawAddr[2], &InvertedPublicKey[start], 14);
|
||||||
}
|
}
|
||||||
__device__ void invertKey(const unsigned char* key, unsigned char* inverted) {
|
__device__ void invertKey(const unsigned char* key, unsigned char* inverted) {
|
||||||
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i += 4) {
|
for (int i = 0; i < 32; i += 4) {
|
||||||
uchar4 k = *(reinterpret_cast<const uchar4*>(&key[i]));
|
uchar4 k = *(reinterpret_cast<const uchar4*>(&key[i]));
|
||||||
*(reinterpret_cast<uchar4*>(&inverted[i])) = make_uchar4(k.x ^ 0xFF, k.y ^ 0xFF, k.z ^ 0xFF, k.w ^ 0xFF);
|
*(reinterpret_cast<uchar4*>(&inverted[i])) = make_uchar4(k.x ^ 0xFF, k.y ^ 0xFF, k.z ^ 0xFF, k.w ^ 0xFF);
|
||||||
|
@ -8,7 +8,8 @@
|
|||||||
#include <keymanip.cuh>
|
#include <keymanip.cuh>
|
||||||
__device__ unsigned d_high = 0x10;
|
__device__ unsigned d_high = 0x10;
|
||||||
__device__ int parameters(const char* arg) noexcept {
|
__device__ int parameters(const char* arg) noexcept {
|
||||||
if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) || (cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) {
|
if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) ||
|
||||||
|
(cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) {
|
||||||
return 777;
|
return 777;
|
||||||
}
|
}
|
||||||
int space_index = cstring_find(arg, " ");
|
int space_index = cstring_find(arg, " ");
|
||||||
@ -49,7 +50,8 @@ __device__ __forceinline__ unsigned char getZeros(const unsigned char* v) noexce
|
|||||||
unsigned char leadZeros = 0;
|
unsigned char leadZeros = 0;
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i += 4) {
|
for (int i = 0; i < 32; i += 4) {
|
||||||
unsigned word = (static_cast<unsigned>(v[i]) << 24) | (static_cast<unsigned>(v[i + 1]) << 16) | (static_cast<unsigned>(v[i + 2]) << 8) | (static_cast<unsigned>(v[i + 3]));
|
unsigned word = (static_cast<unsigned>(v[i]) << 24) | (static_cast<unsigned>(v[i + 1]) << 16) |
|
||||||
|
(static_cast<unsigned>(v[i + 2]) << 8) | (static_cast<unsigned>(v[i + 3]));
|
||||||
if (word == 0)
|
if (word == 0)
|
||||||
leadZeros += 32;
|
leadZeros += 32;
|
||||||
else {
|
else {
|
||||||
@ -59,19 +61,32 @@ __device__ __forceinline__ unsigned char getZeros(const unsigned char* v) noexce
|
|||||||
}
|
}
|
||||||
return leadZeros;
|
return leadZeros;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
__global__ void initRandSeed(curandState* states, const unsigned long seed) {
|
__global__ void initRandSeed(curandState* states, const unsigned long seed) {
|
||||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
curand_init(seed, idx, 0, &states[idx]);
|
curand_init(seed, idx, 0, &states[idx]);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
__global__ void initRand(curandState* rs) {
|
__global__ void initRand(curandState* rs, unsigned int* d_seeds) {
|
||||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
curand_init(clock64() + id * 7919ULL, id, 0, &rs[id]);
|
curand_init(clock64() + id * 7919ULL, id, 0, &rs[id]);
|
||||||
#pragma unroll 10
|
#pragma unroll 10
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
curand(&rs[id]);
|
curand(&rs[id]);
|
||||||
}
|
}
|
||||||
|
unsigned int seed = curand(&rs[id]);
|
||||||
|
d_seeds[id] = seed;
|
||||||
|
}
|
||||||
|
int checkSeeds(unsigned int* seeds, int count) {
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
for (int j = i + 1; j < count; j++) {
|
||||||
|
if (seeds[i] == seeds[j]) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
__device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state) {
|
__device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state) {
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
@ -84,11 +99,6 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
__global__ void KeyGenKernel(curandState* randStates) {
|
__global__ void KeyGenKernel(curandState* randStates) {
|
||||||
__shared__ unsigned local_high;
|
|
||||||
if (threadIdx.x == 0) {
|
|
||||||
local_high = d_high;
|
|
||||||
}
|
|
||||||
__syncthreads();
|
|
||||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
curandState localState = randStates[idx];
|
curandState localState = randStates[idx];
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -96,15 +106,13 @@ __global__ void KeyGenKernel(curandState* randStates) {
|
|||||||
KeysBox32 keys;
|
KeysBox32 keys;
|
||||||
rmbytes(seed, &localState);
|
rmbytes(seed, &localState);
|
||||||
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
||||||
if (unsigned zeros = getZeros(keys.PublicKey); zeros > local_high) {
|
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax(&d_high, zeros)) {
|
||||||
if (zeros > atomicMax(&d_high, zeros)) {
|
|
||||||
Addr16 raw;
|
Addr16 raw;
|
||||||
Key32 inv;
|
Key32 inv;
|
||||||
invertKey(keys.PublicKey, inv);
|
invertKey(keys.PublicKey, inv);
|
||||||
getRawAddress(zeros, inv, raw);
|
getRawAddress(zeros, inv, raw);
|
||||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
||||||
local_high = zeros;
|
d_high = zeros;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -126,15 +134,27 @@ int main(int argc, char* argv[]) {
|
|||||||
args<<<1, 1 >>>(d_argv, argc, d_result);
|
args<<<1, 1 >>>(d_argv, argc, d_result);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
||||||
cudaGetDeviceProperties_v2(&prop, 0);
|
cudaGetDeviceProperties(&prop, 0);
|
||||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADS_P_B, 0);
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADS_P_B, 0);
|
||||||
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
|
const int tTh = mBpSM * prop.multiProcessorCount * THREADS_P_B;
|
||||||
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocksThreads: %d:%d\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADS_P_B, THREADS_P_B);
|
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocks: %d (Threads: %d)\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADS_P_B, THREADS_P_B);
|
||||||
cudaMalloc(&rst, tTh * sizeof(curandState));
|
cudaMalloc(&rst, tTh * sizeof(curandState));
|
||||||
initRand<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
unsigned int* d_seeds;
|
||||||
|
cudaMalloc(&d_seeds, tTh * sizeof(unsigned int));
|
||||||
|
initRand<<<tTh / THREADS_P_B, THREADS_P_B >>>(rst, d_seeds);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
unsigned int* h_seeds = (unsigned int*)malloc(tTh * sizeof(unsigned int));
|
||||||
|
cudaMemcpy(h_seeds, d_seeds, tTh * sizeof(unsigned int), cudaMemcpyDeviceToHost);
|
||||||
|
if (checkSeeds(h_seeds, tTh)) {
|
||||||
|
fprintf(stderr, "Duplicate seeds found!\n");
|
||||||
|
free(h_seeds);
|
||||||
|
cudaFree(d_seeds);
|
||||||
|
cudaFree(rst);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
free(h_seeds);
|
||||||
|
cudaFree(d_seeds);
|
||||||
KeyGenKernel<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
KeyGenKernel<<<tTh / THREADS_P_B, THREADS_P_B>>>(rst);
|
||||||
cudaDeviceSynchronize();
|
|
||||||
cudaFree(rst);
|
cudaFree(rst);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user