diff --git a/libs/ed25519.cu b/libs/ed25519.cu index 4242d3c..8dd44bb 100644 --- a/libs/ed25519.cu +++ b/libs/ed25519.cu @@ -18,7 +18,7 @@ __device__ __constant__ unsigned char ed25519_k[32] = { 0x30,0xd1,0xf3,0xee,0xf2,0x80,0x8e,0x19,0xe7,0xfc,0xdf,0x56,0xdc,0xd9,0x06,0x24 }; __device__ void ed25519_unproject(unsigned char* __restrict__ x, unsigned char* __restrict__ y, const struct ed25519_pt* __restrict__ p) { - unsigned char z1[32]; + unsigned char __align__(32) z1[32]; f25519_inv__distinct(z1, p->z); f25519_mul__distinct(x, p->x, z1); f25519_mul__distinct(y, p->y, z1); @@ -26,7 +26,7 @@ __device__ void ed25519_unproject(unsigned char* __restrict__ x, unsigned char* f25519_normalize(y); } __device__ void ed25519_pack(unsigned char* __restrict__ c, const unsigned char* __restrict__ x, const unsigned char* __restrict__ y) { - unsigned char tmp[32]; + unsigned char __align__(32) tmp[32]; unsigned char parity; f25519_copy(tmp, x); f25519_normalize(tmp); @@ -36,7 +36,7 @@ __device__ void ed25519_pack(unsigned char* __restrict__ c, const unsigned char* c[31] |= parity; } __device__ __forceinline__ void ed25519_add(struct ed25519_pt* __restrict__ r, const struct ed25519_pt* __restrict__ p1, const struct ed25519_pt* __restrict__ p2) { - unsigned char a[32], b[32], c[32], d[32], e[32], f[32], g[32], h[32]; + unsigned char __align__(32) a[32], __align__(32) b[32], __align__(32) c[32], __align__(32) d[32], __align__(32) e[32], __align__(32) f[32], __align__(32) g[32], __align__(32) h[32]; f25519_sub(c, p1->y, p1->x); f25519_sub(d, p2->y, p2->x); f25519_mul__distinct(a, c, d); @@ -57,7 +57,7 @@ __device__ __forceinline__ void ed25519_add(struct ed25519_pt* __restrict__ r, c f25519_mul__distinct(r->z, f, g); } __device__ __forceinline__ void ed25519_double(struct ed25519_pt* __restrict__ r, const struct ed25519_pt* __restrict__ p) { - unsigned char a[32], b[32], c[32], e[32], f[32], g[32], h[32]; + unsigned char __align__(32) a[32], __align__(32) b[32], __align__(32) c[32], __align__(32) e[32], __align__(32) f[32], __align__(32) g[32], __align__(32) h[32]; f25519_mul__distinct(a, p->x, p->x); f25519_mul__distinct(b, p->y, p->y); f25519_mul__distinct(c, p->z, p->z); diff --git a/libs/keymanip.cu b/libs/keymanip.cu index e8fef94..6d2984d 100644 --- a/libs/keymanip.cu +++ b/libs/keymanip.cu @@ -41,6 +41,8 @@ __device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Addr16& rawA memcpy(&rawAddr[2], &InvertedPublicKey[start], 14); } __device__ void invertKey(const unsigned char* key, unsigned char* inverted) { -#pragma unroll 32 - for (unsigned char i = 0; i < 32; i++) inverted[i] = key[i] ^ 0xFF; + for (int i = 0; i < 32; i += 4) { + uchar4 k = *(reinterpret_cast(&key[i])); + *(reinterpret_cast(&inverted[i])) = make_uchar4(k.x ^ 0xFF, k.y ^ 0xFF, k.z ^ 0xFF, k.w ^ 0xFF); + } } \ No newline at end of file diff --git a/libs/keymanip.cuh b/libs/keymanip.cuh index 907eb07..8b1b19a 100644 --- a/libs/keymanip.cuh +++ b/libs/keymanip.cuh @@ -8,7 +8,7 @@ struct ds46 { }; using Addr16 = unsigned char[16]; using Key32 = unsigned char[32]; -struct KeysBox32 { +struct __align__(32) KeysBox32 { Key32 PublicKey; Key32 PrivateKey; }; diff --git a/libs/sha512.cu b/libs/sha512.cu index b9330be..0d0ae26 100644 --- a/libs/sha512.cu +++ b/libs/sha512.cu @@ -109,9 +109,7 @@ __device__ void sha512_get(const sha512_state* s, unsigned char* hash) { len -= 8; i++; } - if (len > 0) { - unsigned char tmp[8]; - store64(tmp, s->h[i]); - memcpy(hash, tmp, len); - } + unsigned char tmp[8]; + store64(tmp, s->h[i]); + memcpy(hash, tmp, len); } \ No newline at end of file diff --git a/sources/main.cu b/sources/main.cu index 9a666aa..6dedef8 100644 --- a/sources/main.cu +++ b/sources/main.cu @@ -84,9 +84,10 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state) __global__ void KeyGen(curandState* randStates) { int idx = blockIdx.x * blockDim.x + threadIdx.x; curandState localState = randStates[idx]; + //printf("Seed: %s\n", ktos(seed).data); while (true) { - KeysBox32 keys; Key32 seed; + KeysBox32 keys; rmbytes(seed, &localState); ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed); if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax((unsigned*)&d_high, zeros)) {