test
This commit is contained in:
parent
41f4f94a8d
commit
553c28c995
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,5 +1,3 @@
|
||||
.vscode/
|
||||
*.o
|
||||
*.sh
|
||||
main
|
||||
yggm
|
||||
build/*
|
||||
-Makefile
|
@ -1,9 +1,9 @@
|
||||
NVCC := nvcc
|
||||
NVCC_FLAGS := -O3 -use_fast_math -Xptxas -O3 -gencode arch=compute_75,code=sm_75 \
|
||||
--default-stream per-thread -Wno-deprecated-gpu-targets --expt-relaxed-constexpr -I../libs/
|
||||
MAIN_SOURCE := ../sources/main.cu
|
||||
MAIN_SOURCE := ../sources/main.cu
|
||||
LIBS_DIR := ../libs/
|
||||
BUILD_DIR := ../build/
|
||||
BUILD_DIR := ../build
|
||||
LIBS_SOURCES := $(wildcard $(LIBS_DIR)*.cu)
|
||||
LIBS_OBJECTS := $(patsubst $(LIBS_DIR)%.cu,$(BUILD_DIR)/%.o,$(LIBS_SOURCES))
|
||||
TARGET := main
|
||||
@ -14,5 +14,5 @@ $(TARGET): $(MAIN_SOURCE) $(LIBS_OBJECTS)
|
||||
$(NVCC) $(NVCC_FLAGS) -o $(TARGET) $(MAIN_SOURCE) $(LIBS_OBJECTS)
|
||||
$(BUILD_DIR)/%.o: $(LIBS_DIR)%.cu | $(BUILD_DIR)
|
||||
$(NVCC) $(NVCC_FLAGS) -c $< -o $@
|
||||
#clean:
|
||||
# rm -f $(BUILD_DIR)/*.o
|
||||
clean:
|
||||
rm $(BUILD_DIR)/main
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
#include <ed25519.cuh>
|
||||
#include <sha512.cuh>
|
||||
#include <fprime.cuh>
|
||||
__device__ __forceinline__ void expand_key(unsigned char* expanded, const unsigned char* secret) {
|
||||
struct sha512_state s;
|
||||
sha512_init(&s);
|
||||
@ -23,4 +22,16 @@ __device__ __forceinline__ void edsign_sec_to_pub(unsigned char* pub, const unsi
|
||||
unsigned char expanded[64];
|
||||
expand_key(expanded, secret);
|
||||
sm_pack(pub, expanded);
|
||||
}
|
||||
__device__ void compact_wipe(void* data, unsigned long length) {
|
||||
volatile unsigned char* p = (volatile unsigned char*)data;
|
||||
while (length--) {
|
||||
*p++ = 0;
|
||||
}
|
||||
}
|
||||
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]) {
|
||||
edsign_sec_to_pub(public_key, random_seed);
|
||||
memcpy(private_key, random_seed, 32);
|
||||
memcpy(private_key + 32, public_key, 32);
|
||||
compact_wipe(random_seed, 32);
|
||||
}
|
@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
#define F25519_SIZE 32
|
||||
__device__ __constant__ unsigned char f25519_zero[F25519_SIZE] = { 0 };
|
||||
__device__ __constant__ unsigned char f25519_one[F25519_SIZE] = { 1 };
|
||||
__device__ __forceinline__ void f25519_load(unsigned char* __restrict__ x, unsigned int c) {
|
||||
#pragma unroll
|
||||
for (unsigned int i = 0; i < sizeof(c); i++) {
|
||||
@ -47,16 +45,6 @@ __device__ __forceinline__ void f25519_normalize(unsigned char* __restrict__ x)
|
||||
minusp[F25519_SIZE - 1] = (unsigned char)c;
|
||||
f25519_select(x, minusp, x, (c >> 15) & 1);
|
||||
}
|
||||
__device__ __forceinline__ unsigned char f25519_eq(const unsigned char* __restrict__ x, const unsigned char* __restrict__ y) {
|
||||
unsigned char s = 0;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < F25519_SIZE; i++)
|
||||
s |= x[i] ^ y[i];
|
||||
s |= s >> 4;
|
||||
s |= s >> 2;
|
||||
s |= s >> 1;
|
||||
return (s ^ 1) & 1;
|
||||
}
|
||||
__device__ __forceinline__ void f25519_add(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||
unsigned short c = 0;
|
||||
#pragma unroll
|
||||
@ -74,15 +62,15 @@ __device__ __forceinline__ void f25519_add(unsigned char* __restrict__ r, const
|
||||
}
|
||||
}
|
||||
__device__ __forceinline__ void f25519_sub(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||
uint32_t c = 218;
|
||||
unsigned c = 218;
|
||||
int i = 0;
|
||||
#pragma unroll
|
||||
for (i = 0; i + 1 < F25519_SIZE; i++) {
|
||||
c += 65280 + ((uint32_t)a[i]) - ((uint32_t)b[i]);
|
||||
c += 65280 + ((unsigned)a[i]) - ((unsigned)b[i]);
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
c += ((uint32_t)a[i]) - ((uint32_t)b[i]);
|
||||
c += ((unsigned)a[i]) - ((unsigned)b[i]);
|
||||
r[i] = (unsigned char)(c & 127);
|
||||
c = (c >> 7) * 19;
|
||||
#pragma unroll
|
||||
@ -93,15 +81,15 @@ __device__ __forceinline__ void f25519_sub(unsigned char* __restrict__ r, const
|
||||
}
|
||||
}
|
||||
__device__ __forceinline__ void f25519_neg(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
|
||||
uint32_t c = 218;
|
||||
unsigned c = 218;
|
||||
int i = 0;
|
||||
#pragma unroll
|
||||
for (i = 0; i + 1 < F25519_SIZE; i++) {
|
||||
c += 65280 - ((uint32_t)a[i]);
|
||||
c += 65280 - ((unsigned)a[i]);
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
c -= ((uint32_t)a[i]);
|
||||
c -= ((unsigned)a[i]);
|
||||
r[i] = (unsigned char)(c & 127);
|
||||
c = (c >> 7) * 19;
|
||||
#pragma unroll
|
||||
@ -112,15 +100,15 @@ __device__ __forceinline__ void f25519_neg(unsigned char* __restrict__ r, const
|
||||
}
|
||||
}
|
||||
__device__ __forceinline__ void f25519_mul__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||
uint32_t c = 0;
|
||||
unsigned c = 0;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < F25519_SIZE; i++) {
|
||||
c >>= 8;
|
||||
for (int j = 0; j <= i; j++) {
|
||||
c += ((uint32_t)a[j]) * ((uint32_t)b[i - j]);
|
||||
c += ((unsigned)a[j]) * ((unsigned)b[i - j]);
|
||||
}
|
||||
for (int j = i + 1; j < F25519_SIZE; j++) {
|
||||
c += ((uint32_t)a[j]) * ((uint32_t)b[F25519_SIZE + i - j]) * 38;
|
||||
c += ((unsigned)a[j]) * ((unsigned)b[F25519_SIZE + i - j]) * 38;
|
||||
}
|
||||
r[i] = (unsigned char)c;
|
||||
}
|
||||
|
@ -1,88 +0,0 @@
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
__device__ void raw_add(unsigned char* x, const unsigned char* p) {
|
||||
unsigned short c = 0;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c += ((unsigned short)x[i]) + ((unsigned short)p[i]);
|
||||
x[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
__device__ void fprime_select(unsigned char* dst, const unsigned char* zero, const unsigned char* one, unsigned char condition) {
|
||||
const unsigned char mask = -condition;
|
||||
for (int i = 0; i < 32; i++)
|
||||
dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
|
||||
}
|
||||
__device__ void raw_try_sub(unsigned char* x, const unsigned char* p) {
|
||||
unsigned char minusp[32];
|
||||
unsigned short c = 0;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c = ((unsigned short)x[i]) - ((unsigned short)p[i]) - c;
|
||||
minusp[i] = (unsigned char)c;
|
||||
c = (c >> 8) & 1;
|
||||
}
|
||||
fprime_select(x, minusp, x, c);
|
||||
}
|
||||
__device__ int prime_msb(const unsigned char* p) {
|
||||
int i;
|
||||
unsigned char x;
|
||||
for (i = 32 - 1; i >= 0; i--) {
|
||||
if (p[i]) break;
|
||||
}
|
||||
x = p[i];
|
||||
i <<= 3;
|
||||
while (x) {
|
||||
x >>= 1;
|
||||
i++;
|
||||
}
|
||||
return i - 1;
|
||||
}
|
||||
__device__ void shift_n_bits(unsigned char* x, int n) {
|
||||
unsigned short c = 0;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c |= ((unsigned short)x[i]) << n;
|
||||
x[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
__device__ inline int min_int(int a, int b) {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
__device__ void fprime_from_bytes(unsigned char* n, const unsigned char* x, unsigned long len, const unsigned char* modulus) {
|
||||
const int preload_total = min_int(prime_msb(modulus) - 1, (int)(len << 3));
|
||||
const int preload_bytes = preload_total >> 3;
|
||||
const int preload_bits = preload_total & 7;
|
||||
const int rbits = (len << 3) - preload_total;
|
||||
memset(n, 0, 32);
|
||||
for (int i = 0; i < preload_bytes; i++)
|
||||
n[i] = x[len - preload_bytes + i];
|
||||
if (preload_bits) {
|
||||
shift_n_bits(n, preload_bits);
|
||||
n[0] |= x[len - preload_bytes - 1] >> (8 - preload_bits);
|
||||
}
|
||||
for (int i = rbits - 1; i >= 0; i--) {
|
||||
const unsigned char bit = (x[i >> 3] >> (i & 7)) & 1;
|
||||
shift_n_bits(n, 1);
|
||||
n[0] |= bit;
|
||||
raw_try_sub(n, modulus);
|
||||
}
|
||||
}
|
||||
__device__ void fprime_add(unsigned char* r, const unsigned char* a, const unsigned char* modulus) {
|
||||
raw_add(r, a);
|
||||
raw_try_sub(r, modulus);
|
||||
}
|
||||
__device__ inline void fprime_copy(unsigned char* x, const unsigned char* a) {
|
||||
memcpy(x, a, 32);
|
||||
}
|
||||
__device__ void fprime_mul(unsigned char* r, const unsigned char* a, const unsigned char* b, const unsigned char* modulus) {
|
||||
memset(r, 0, 32);
|
||||
for (int i = prime_msb(modulus); i >= 0; i--) {
|
||||
const unsigned char bit = (b[i >> 3] >> (i & 7)) & 1;
|
||||
unsigned char plusa[32];
|
||||
shift_n_bits(r, 1);
|
||||
raw_try_sub(r, modulus);
|
||||
fprime_copy(plusa, r);
|
||||
fprime_add(plusa, a, modulus);
|
||||
fprime_select(r, r, plusa, bit);
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@ __device__ __constant__ sha512_state sha512_initial_state = { {
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL,
|
||||
} };
|
||||
__device__ __constant__ uint64_t round_k[80] = {
|
||||
__device__ __constant__ unsigned long round_k[80] = {
|
||||
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
|
||||
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
|
||||
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
|
||||
@ -50,11 +50,11 @@ __device__ __constant__ uint64_t round_k[80] = {
|
||||
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
|
||||
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
|
||||
};
|
||||
__device__ __forceinline__ uint64_t load64(const unsigned char* x) {
|
||||
return ((uint64_t)x[0] << 56) | ((uint64_t)x[1] << 48) | ((uint64_t)x[2] << 40) | ((uint64_t)x[3] << 32)
|
||||
| ((uint64_t)x[4] << 24) | ((uint64_t)x[5] << 16) | ((uint64_t)x[6] << 8) | ((uint64_t)x[7]);
|
||||
__device__ __forceinline__ unsigned long load64(const unsigned char* x) {
|
||||
return ((unsigned long)x[0] << 56) | ((unsigned long)x[1] << 48) | ((unsigned long)x[2] << 40) | ((unsigned long)x[3] << 32)
|
||||
| ((unsigned long)x[4] << 24) | ((unsigned long)x[5] << 16) | ((unsigned long)x[6] << 8) | ((unsigned long)x[7]);
|
||||
}
|
||||
__device__ __forceinline__ void store64(unsigned char* x, uint64_t v) {
|
||||
__device__ __forceinline__ void store64(unsigned char* x, unsigned long v) {
|
||||
x[0] = (unsigned char)(v >> 56);
|
||||
x[1] = (unsigned char)(v >> 48);
|
||||
x[2] = (unsigned char)(v >> 40);
|
||||
@ -64,37 +64,37 @@ __device__ __forceinline__ void store64(unsigned char* x, uint64_t v) {
|
||||
x[6] = (unsigned char)(v >> 8);
|
||||
x[7] = (unsigned char)(v);
|
||||
}
|
||||
__device__ __forceinline__ uint64_t rot64(uint64_t x, int bits) {
|
||||
__device__ __forceinline__ unsigned long rot64(unsigned long x, int bits) {
|
||||
return (x >> bits) | (x << (64 - bits));
|
||||
}
|
||||
__device__ void sha512_block(sha512_state* s, const unsigned char* blk) {
|
||||
uint64_t w[16];
|
||||
unsigned long w[16];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 16; i++) {
|
||||
w[i] = load64(blk + i * 8);
|
||||
}
|
||||
uint64_t a = s->h[0];
|
||||
uint64_t b = s->h[1];
|
||||
uint64_t c = s->h[2];
|
||||
uint64_t d = s->h[3];
|
||||
uint64_t e = s->h[4];
|
||||
uint64_t f = s->h[5];
|
||||
uint64_t g = s->h[6];
|
||||
uint64_t h = s->h[7];
|
||||
unsigned long a = s->h[0];
|
||||
unsigned long b = s->h[1];
|
||||
unsigned long c = s->h[2];
|
||||
unsigned long d = s->h[3];
|
||||
unsigned long e = s->h[4];
|
||||
unsigned long f = s->h[5];
|
||||
unsigned long g = s->h[6];
|
||||
unsigned long h = s->h[7];
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 80; i++) {
|
||||
const int idx = i & 15;
|
||||
const int idx1 = (i + 1) & 15;
|
||||
const int idx7 = (i + 9) & 15;
|
||||
const int idx14 = (i + 14) & 15;
|
||||
uint64_t s0 = rot64(w[idx1], 1) ^ rot64(w[idx1], 8) ^ (w[idx1] >> 7);
|
||||
uint64_t s1 = rot64(w[idx14], 19) ^ rot64(w[idx14], 61) ^ (w[idx14] >> 6);
|
||||
uint64_t S0 = rot64(a, 28) ^ rot64(a, 34) ^ rot64(a, 39);
|
||||
uint64_t S1 = rot64(e, 14) ^ rot64(e, 18) ^ rot64(e, 41);
|
||||
uint64_t ch = (e & f) ^ ((~e) & g);
|
||||
uint64_t temp1 = h + S1 + ch + round_k[i] + w[idx];
|
||||
uint64_t maj = (a & b) ^ (a & c) ^ (b & c);
|
||||
uint64_t temp2 = S0 + maj;
|
||||
unsigned long s0 = rot64(w[idx1], 1) ^ rot64(w[idx1], 8) ^ (w[idx1] >> 7);
|
||||
unsigned long s1 = rot64(w[idx14], 19) ^ rot64(w[idx14], 61) ^ (w[idx14] >> 6);
|
||||
unsigned long S0 = rot64(a, 28) ^ rot64(a, 34) ^ rot64(a, 39);
|
||||
unsigned long S1 = rot64(e, 14) ^ rot64(e, 18) ^ rot64(e, 41);
|
||||
unsigned long ch = (e & f) ^ ((~e) & g);
|
||||
unsigned long temp1 = h + S1 + ch + round_k[i] + w[idx];
|
||||
unsigned long maj = (a & b) ^ (a & c) ^ (b & c);
|
||||
unsigned long temp2 = S0 + maj;
|
||||
h = g;
|
||||
g = f;
|
||||
f = e;
|
||||
|
131
sources/main.cu
131
sources/main.cu
@ -1,24 +1,16 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cstdint>
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <curand_kernel.h>
|
||||
#include <sha512.cuh>
|
||||
#include <arpa/inet.h>
|
||||
#include <ed25519.cuh>
|
||||
#include <edsign.cuh>
|
||||
__device__ __constant__ char hexDigitsConst[17] = "0123456789abcdef";
|
||||
using Address = unsigned char[16];
|
||||
using Key = unsigned char[32];
|
||||
struct KeysBox {
|
||||
Key PublicKey;
|
||||
Key PrivateKey;
|
||||
};
|
||||
struct option {
|
||||
unsigned high = 0x10;
|
||||
};
|
||||
__device__ static option conf;
|
||||
__device__ static unsigned high = 0x10;
|
||||
struct ds64 {
|
||||
char data[65];
|
||||
};
|
||||
@ -27,7 +19,7 @@ struct ds46 {
|
||||
};
|
||||
__device__ ds64 KeyToString(const unsigned char* key) noexcept {
|
||||
ds64 str;
|
||||
const char* hexDigits = hexDigitsConst;
|
||||
const char* hexDigits = "0123456789abcdef";
|
||||
for (unsigned char i = 0; i < 32; i++) {
|
||||
str.data[2 * i] = hexDigits[key[i] >> 4];
|
||||
str.data[2 * i + 1] = hexDigits[key[i] & 0x0F];
|
||||
@ -37,13 +29,9 @@ __device__ ds64 KeyToString(const unsigned char* key) noexcept {
|
||||
}
|
||||
__device__ ds46 getAddress(const unsigned char rawAddr[16]) noexcept {
|
||||
ds46 addrStr;
|
||||
#ifdef __CUDA_ARCH__
|
||||
const char* hexDigits = hexDigitsConst;
|
||||
#else
|
||||
const char* hexDigits = "0123456789abcdef";
|
||||
#endif
|
||||
int pos = 0;
|
||||
for (int group = 0; group < 8; group++) {
|
||||
unsigned pos = 0;
|
||||
for (unsigned char group = 0; group < 8; group++) {
|
||||
int idx = group * 2;
|
||||
addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4];
|
||||
addrStr.data[pos++] = hexDigits[rawAddr[idx] & 0x0F];
|
||||
@ -69,7 +57,28 @@ __device__ void getRawAddress(int lErase, Key& InvertedPublicKey, Address& rawAd
|
||||
rawAddr[1] = static_cast<unsigned char>(lErase - 1);
|
||||
memcpy(&rawAddr[2], &InvertedPublicKey[start], 14);
|
||||
}
|
||||
__device__ unsigned long long xorshift128plus(unsigned long long* state) {
|
||||
__device__ unsigned char zeroCounter(unsigned int x) {
|
||||
if (x == 0) return 32;
|
||||
return static_cast<unsigned char>(__builtin_clz(x));
|
||||
}
|
||||
__device__ unsigned char getZeros(const unsigned char* v) {
|
||||
unsigned char leadZeros = 0;
|
||||
for (int i = 0; i < 32; i += 4) {
|
||||
unsigned word = (static_cast<unsigned>(v[i]) << 24) | (static_cast<unsigned>(v[i + 1]) << 16) | (static_cast<unsigned>(v[i + 2]) << 8) | (static_cast<unsigned>(v[i + 3]));
|
||||
if (word == 0) {
|
||||
leadZeros += 32;
|
||||
} else {
|
||||
leadZeros += zeroCounter(word);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return leadZeros;
|
||||
}
|
||||
__global__ void initRand(curandState* randStates) {
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curand_init((unsigned long long)clock64() + id, id, 0, &randStates[id]);
|
||||
}
|
||||
__device__ unsigned long long xorshift128plus(unsigned long long* state) noexcept {
|
||||
unsigned long long x = state[0];
|
||||
const unsigned long long y = state[1];
|
||||
state[0] = y;
|
||||
@ -79,80 +88,52 @@ __device__ unsigned long long xorshift128plus(unsigned long long* state) {
|
||||
state[1] = x;
|
||||
return x + y;
|
||||
}
|
||||
__device__ unsigned char zeroCounter(unsigned int x) {
|
||||
if (x == 0)
|
||||
return 32;
|
||||
#ifdef __CUDA_ARCH__
|
||||
return static_cast<unsigned char>(__clz(x));
|
||||
#else
|
||||
return static_cast<unsigned char>(__builtin_clz(x));
|
||||
#endif
|
||||
}
|
||||
__device__ unsigned char getZeros(const unsigned char* v) {
|
||||
unsigned char leadZeros = 0;
|
||||
for (int i = 0; i < 32; i += 4) {
|
||||
unsigned word = (static_cast<unsigned>(v[i]) << 24) | (static_cast<unsigned>(v[i + 1]) << 16) | (static_cast<unsigned>(v[i + 2]) << 8) | (static_cast<unsigned>(v[i + 3]));
|
||||
if (word == 0)
|
||||
leadZeros += 32;
|
||||
else {
|
||||
leadZeros += zeroCounter(word);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return leadZeros;
|
||||
}
|
||||
__global__ void initRandStates(curandState* randStates) {
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curand_init((unsigned long long)clock64() + id, id, 0, &randStates[id]);
|
||||
}
|
||||
__device__ void generateRandomBytes(unsigned char* buf, unsigned long size, curandState* state) {
|
||||
__device__ void rmbytes(unsigned char* buf, unsigned long size, unsigned long long* state) {
|
||||
for (unsigned long i = 0; i < size; i++) {
|
||||
buf[i] = curand(state) & 0xFF;
|
||||
buf[i] = xorshift128plus(state) & 0xFF;
|
||||
}
|
||||
}
|
||||
__device__ void invertKey(const unsigned char* key, unsigned char* inverted) {
|
||||
for (int i = 0; i < 32; i++)
|
||||
inverted[i] = key[i] ^ 0xFF;
|
||||
}
|
||||
__device__ void compact_wipe(void* data, unsigned long length) {
|
||||
volatile unsigned char* p = (volatile unsigned char*)data;
|
||||
while (length--) {
|
||||
*p++ = 0;
|
||||
}
|
||||
}
|
||||
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]) {
|
||||
edsign_sec_to_pub(public_key, random_seed);
|
||||
memcpy(private_key, random_seed, 32);
|
||||
memcpy(private_key + 32, public_key, 32);
|
||||
compact_wipe(random_seed, 32);
|
||||
}
|
||||
__global__ __launch_bounds__(256) void minerKernel(curandState* randStates) {
|
||||
__global__ void minerKernel(curandState* randStates) {
|
||||
int thid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curandState localState = randStates[thid];
|
||||
unsigned long long xorshiftState[2];
|
||||
xorshiftState[0] = curand(&localState);
|
||||
xorshiftState[1] = curand(&localState);
|
||||
Key seed;
|
||||
generateRandomBytes(seed, sizeof(seed), &localState);
|
||||
rmbytes(seed, sizeof(seed), xorshiftState);
|
||||
if (thid == 0) printf("Seed: %s\n", KeyToString(seed).data);
|
||||
while (true) {
|
||||
generateRandomBytes(seed, sizeof(seed), &localState);
|
||||
KeysBox keys;
|
||||
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
||||
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax(&conf.high, (unsigned)zeros)) {
|
||||
Key inv;
|
||||
Address raw;
|
||||
invertKey(keys.PublicKey, inv);
|
||||
getRawAddress(zeros, inv, raw);
|
||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddress(raw).data, KeyToString(keys.PublicKey).data, KeyToString(keys.PrivateKey).data);
|
||||
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax(&high, (unsigned)zeros)) {
|
||||
printf("\nIPv6:\t%x\nPK:\t%s\nSK:\t%s\n", zeros, KeyToString(keys.PublicKey).data, KeyToString(keys.PrivateKey).data);
|
||||
}
|
||||
__syncthreads();
|
||||
rmbytes(seed, sizeof(seed), xorshiftState);
|
||||
}
|
||||
//randStates[thid] = localState;
|
||||
}
|
||||
int main() {
|
||||
curandState* d_randStates;
|
||||
cudaMalloc(&d_randStates, 1024 * sizeof(curandState));
|
||||
initRandStates << <4, 256 >> > (d_randStates);
|
||||
const int threadsPerBlock = 256;
|
||||
cudaDeviceProp prop;
|
||||
cudaGetDeviceProperties_v2(&prop, 0);
|
||||
int mBpSM;
|
||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, minerKernel, threadsPerBlock, 0);
|
||||
int SMs = prop.multiProcessorCount;
|
||||
int maxBlocks = mBpSM * SMs;
|
||||
const int totalThreads = maxBlocks * threadsPerBlock;
|
||||
printf("SMs: %d\n", SMs);
|
||||
printf("maxBlocks: %d\n", maxBlocks);
|
||||
printf("totalThreads: %d\n", totalThreads);
|
||||
printf("MaxBlocksPerSM: %d\n", mBpSM);
|
||||
curandState* rst;
|
||||
cudaMalloc(&rst, totalThreads * sizeof(curandState));
|
||||
initRand<<<100, threadsPerBlock >>>(rst);
|
||||
cudaDeviceSynchronize();
|
||||
minerKernel << <4, 256 >> > (d_randStates);
|
||||
minerKernel<<<100, threadsPerBlock>>>(rst);
|
||||
cudaDeviceSynchronize();
|
||||
cudaFree(d_randStates);
|
||||
cudaFree(rst);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user