fixes
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#include <ed25519.cuh>
|
||||
#include <precomp_data.h>
|
||||
#include <f25519.cuh>
|
||||
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q) {
|
||||
fe t0;
|
||||
fe_add(r->X, p->Y, p->X);
|
||||
@@ -51,11 +52,10 @@ void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p
|
||||
fe_tobytes(s, y);
|
||||
s[31] ^= fe_isnegative(x) << 7;
|
||||
}
|
||||
static unsigned char __host__ __device__ equal(signed char b, signed char c) {
|
||||
unsigned long x = b ^ c;
|
||||
x -= 1;
|
||||
static unsigned char __host__ __device__ equal(const signed char b, const signed char c) {
|
||||
unsigned long x = (b ^ c) - 1;
|
||||
x >>= 63;
|
||||
return (unsigned char)x;
|
||||
return static_cast<unsigned char>(x);
|
||||
}
|
||||
static void __host__ __device__ cmov(ge_precomp* __restrict__ t, const ge_precomp* __restrict__ u, unsigned char b) {
|
||||
fe_cmov(t->yplusx, u->yplusx, b);
|
||||
@@ -96,7 +96,7 @@ void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigne
|
||||
e[2 * i] = a[i] & 15;
|
||||
e[2 * i + 1] = a[i] >> 4;
|
||||
}
|
||||
#pragma unroll
|
||||
#pragma unroll 63
|
||||
for (i = 0, carry = 0; i < 63; i++) {
|
||||
e[i] += carry;
|
||||
carry = (e[i] + 8) >> 4;
|
||||
@@ -104,12 +104,15 @@ void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigne
|
||||
}
|
||||
e[63] += carry;
|
||||
#pragma unroll 10
|
||||
for (unsigned char i = 0; i < 10; i++) h->X[i] = 0;
|
||||
fe_1(h->Y);
|
||||
fe_1(h->Z);
|
||||
#pragma unroll 10
|
||||
for (unsigned char i = 0; i < 10; i++) h->T[i] = 0;
|
||||
#pragma unroll
|
||||
for (unsigned char i = 0; i < 10; i++) {
|
||||
h->X[i] = 0;
|
||||
h->Y[i] = 0;
|
||||
h->Z[i] = 0;
|
||||
h->T[i] = 0;
|
||||
}
|
||||
h->Y[0] = 1;
|
||||
h->Z[0] = 1;
|
||||
#pragma unroll 32
|
||||
for (i = 1; i < 64; i += 2) {
|
||||
select(&t, i >> 1, e[i]);
|
||||
ge_madd(&r, h, &t);
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
#ifndef __ED25519_H
|
||||
#define __ED25519_H
|
||||
#include <f25519.cuh>
|
||||
typedef struct {
|
||||
fe X, Y, Z;
|
||||
signed int X[10], Y[10], Z[10];
|
||||
} ge_p2;
|
||||
typedef struct {
|
||||
fe X, Y, Z, T;
|
||||
signed int X[10], Y[10], Z[10], T[10];
|
||||
} ge_p3;
|
||||
typedef struct {
|
||||
fe X, Y, Z, T;
|
||||
signed int X[10], Y[10], Z[10], T[10];
|
||||
} ge_p1p1;
|
||||
typedef struct {
|
||||
fe yplusx, yminusx, xy2d;
|
||||
signed int yplusx[10], yminusx[10], xy2d[10];
|
||||
} ge_precomp;
|
||||
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h);
|
||||
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q);
|
||||
|
||||
@@ -362,16 +362,7 @@ void __host__ __device__ fe_sq2(fe __restrict__ h, const fe& __restrict__ f) {
|
||||
long h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
|
||||
long h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
|
||||
long h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
|
||||
long carry0;
|
||||
long carry1;
|
||||
long carry2;
|
||||
long carry3;
|
||||
long carry4;
|
||||
long carry5;
|
||||
long carry6;
|
||||
long carry7;
|
||||
long carry8;
|
||||
long carry9;
|
||||
long carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
|
||||
h0 <<= 1;
|
||||
h1 <<= 1;
|
||||
h2 <<= 1;
|
||||
|
||||
@@ -33,11 +33,9 @@ __device__ __forceinline__ unsigned long load64(const unsigned char* __restrict_
|
||||
}
|
||||
__device__ __forceinline__ void store64(unsigned char* __restrict__ x, unsigned long v) {
|
||||
#pragma unroll 8
|
||||
for (int i = 0; i < 8; i++) x[i] = (unsigned char)(v >> (56 - i * 8));
|
||||
}
|
||||
__device__ __forceinline__ unsigned long rot64(unsigned long x, int bits) {
|
||||
return (x >> bits) | (x << (64 - bits));
|
||||
for (unsigned char i = 0; i < 8; i++) x[i] = static_cast<unsigned char>(v >> (56 - i * 8));
|
||||
}
|
||||
#define rot64(x, bits) ((x >> bits) | (x << (64 - bits)))
|
||||
__device__ __forceinline__ void sha512_block(sha512_state* __restrict__ s, const unsigned char* __restrict__ blk) {
|
||||
unsigned long w[16];
|
||||
#pragma unroll 16
|
||||
@@ -53,7 +51,7 @@ __device__ __forceinline__ void sha512_block(sha512_state* __restrict__ s, const
|
||||
unsigned long g = s->h[6];
|
||||
unsigned long h = s->h[7];
|
||||
#pragma unroll 80
|
||||
for (int i = 0; i < 80; i++) {
|
||||
for (unsigned char i = 0; i < 80; i++) {
|
||||
const int idx = i & 15;
|
||||
const int idx1 = (i + 1) & 15;
|
||||
const int idx7 = (i + 9) & 15;
|
||||
@@ -103,6 +101,7 @@ __device__ void sha512_get(const sha512_state* s, unsigned char* hash) {
|
||||
hash += c;
|
||||
len -= c;
|
||||
i++;
|
||||
#pragma unroll
|
||||
while (len >= 8) {
|
||||
store64(hash, s->h[i]);
|
||||
hash += 8;
|
||||
|
||||
@@ -68,12 +68,10 @@ __device__ __forceinline__ unsigned char getZeros(const unsigned char* v) noexce
|
||||
}
|
||||
return leadZeros;
|
||||
}
|
||||
/*
|
||||
__global__ void initRandSeed(curandState* states, const unsigned long seed) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curand_init(seed, idx, 0, &states[idx]);
|
||||
}
|
||||
*/
|
||||
__global__ void initRand(curandState* rs, unsigned int* d_seeds) {
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curand_init(clock64() + id * 7919ULL, id, 0, &rs[id]);
|
||||
@@ -106,7 +104,8 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
|
||||
__global__ void KeyGenKernel(curandState* randStates) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curandState localState = randStates[idx];
|
||||
while (true) {
|
||||
int x = 0;
|
||||
while (x < 0xFFFFFFFF) {
|
||||
Key32 seed;
|
||||
KeysBox32 keys;
|
||||
rmbytes(seed, &localState);
|
||||
|
||||
Reference in New Issue
Block a user