This commit is contained in:
2025-03-17 19:20:29 +05:00
parent 5b543aa710
commit b0c15fad82
5 changed files with 25 additions and 34 deletions

View File

@@ -1,5 +1,6 @@
#include <ed25519.cuh>
#include <precomp_data.h>
#include <f25519.cuh>
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q) {
fe t0;
fe_add(r->X, p->Y, p->X);
@@ -51,11 +52,10 @@ void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p
fe_tobytes(s, y);
s[31] ^= fe_isnegative(x) << 7;
}
static unsigned char __host__ __device__ equal(signed char b, signed char c) {
unsigned long x = b ^ c;
x -= 1;
static unsigned char __host__ __device__ equal(const signed char b, const signed char c) {
unsigned long x = (b ^ c) - 1;
x >>= 63;
return (unsigned char)x;
return static_cast<unsigned char>(x);
}
static void __host__ __device__ cmov(ge_precomp* __restrict__ t, const ge_precomp* __restrict__ u, unsigned char b) {
fe_cmov(t->yplusx, u->yplusx, b);
@@ -96,7 +96,7 @@ void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigne
e[2 * i] = a[i] & 15;
e[2 * i + 1] = a[i] >> 4;
}
#pragma unroll
#pragma unroll 63
for (i = 0, carry = 0; i < 63; i++) {
e[i] += carry;
carry = (e[i] + 8) >> 4;
@@ -104,12 +104,15 @@ void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigne
}
e[63] += carry;
#pragma unroll 10
for (unsigned char i = 0; i < 10; i++) h->X[i] = 0;
fe_1(h->Y);
fe_1(h->Z);
#pragma unroll 10
for (unsigned char i = 0; i < 10; i++) h->T[i] = 0;
#pragma unroll
for (unsigned char i = 0; i < 10; i++) {
h->X[i] = 0;
h->Y[i] = 0;
h->Z[i] = 0;
h->T[i] = 0;
}
h->Y[0] = 1;
h->Z[0] = 1;
#pragma unroll 32
for (i = 1; i < 64; i += 2) {
select(&t, i >> 1, e[i]);
ge_madd(&r, h, &t);

View File

@@ -1,17 +1,16 @@
#ifndef __ED25519_H
#define __ED25519_H
#include <f25519.cuh>
typedef struct {
fe X, Y, Z;
signed int X[10], Y[10], Z[10];
} ge_p2;
typedef struct {
fe X, Y, Z, T;
signed int X[10], Y[10], Z[10], T[10];
} ge_p3;
typedef struct {
fe X, Y, Z, T;
signed int X[10], Y[10], Z[10], T[10];
} ge_p1p1;
typedef struct {
fe yplusx, yminusx, xy2d;
signed int yplusx[10], yminusx[10], xy2d[10];
} ge_precomp;
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h);
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q);

View File

@@ -362,16 +362,7 @@ void __host__ __device__ fe_sq2(fe __restrict__ h, const fe& __restrict__ f) {
long h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
long h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
long h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
long carry0;
long carry1;
long carry2;
long carry3;
long carry4;
long carry5;
long carry6;
long carry7;
long carry8;
long carry9;
long carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
h0 <<= 1;
h1 <<= 1;
h2 <<= 1;

View File

@@ -33,11 +33,9 @@ __device__ __forceinline__ unsigned long load64(const unsigned char* __restrict_
}
__device__ __forceinline__ void store64(unsigned char* __restrict__ x, unsigned long v) {
#pragma unroll 8
for (int i = 0; i < 8; i++) x[i] = (unsigned char)(v >> (56 - i * 8));
}
__device__ __forceinline__ unsigned long rot64(unsigned long x, int bits) {
return (x >> bits) | (x << (64 - bits));
for (unsigned char i = 0; i < 8; i++) x[i] = static_cast<unsigned char>(v >> (56 - i * 8));
}
#define rot64(x, bits) ((x >> bits) | (x << (64 - bits)))
__device__ __forceinline__ void sha512_block(sha512_state* __restrict__ s, const unsigned char* __restrict__ blk) {
unsigned long w[16];
#pragma unroll 16
@@ -53,7 +51,7 @@ __device__ __forceinline__ void sha512_block(sha512_state* __restrict__ s, const
unsigned long g = s->h[6];
unsigned long h = s->h[7];
#pragma unroll 80
for (int i = 0; i < 80; i++) {
for (unsigned char i = 0; i < 80; i++) {
const int idx = i & 15;
const int idx1 = (i + 1) & 15;
const int idx7 = (i + 9) & 15;
@@ -103,6 +101,7 @@ __device__ void sha512_get(const sha512_state* s, unsigned char* hash) {
hash += c;
len -= c;
i++;
#pragma unroll
while (len >= 8) {
store64(hash, s->h[i]);
hash += 8;

View File

@@ -68,12 +68,10 @@ __device__ __forceinline__ unsigned char getZeros(const unsigned char* v) noexce
}
return leadZeros;
}
/*
__global__ void initRandSeed(curandState* states, const unsigned long seed) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
curand_init(seed, idx, 0, &states[idx]);
}
*/
__global__ void initRand(curandState* rs, unsigned int* d_seeds) {
int id = blockIdx.x * blockDim.x + threadIdx.x;
curand_init(clock64() + id * 7919ULL, id, 0, &rs[id]);
@@ -106,7 +104,8 @@ __device__ __forceinline__ void rmbytes(unsigned char* buf, curandState* state)
__global__ void KeyGenKernel(curandState* randStates) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
curandState localState = randStates[idx];
while (true) {
int x = 0;
while (x < 0xFFFFFFFF) {
Key32 seed;
KeysBox32 keys;
rmbytes(seed, &localState);