This commit is contained in:
2025-03-17 15:33:07 +05:00
parent 7ea0e62697
commit 5b543aa710
5 changed files with 23 additions and 34 deletions

View File

@@ -43,7 +43,7 @@ void __host__ __device__ ge_p3_dbl(ge_p1p1* __restrict__ r, const ge_p3* __restr
fe_copy(q.Z, p->Z);
ge_p2_dbl(r, &q);
}
void ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h) {
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h) {
fe recip, x, y;
fe_invert(recip, h->Z);
fe_mul(x, h->X, recip);
@@ -62,7 +62,7 @@ static void __host__ __device__ cmov(ge_precomp* __restrict__ t, const ge_precom
fe_cmov(t->yminusx, u->yminusx, b);
fe_cmov(t->xy2d, u->xy2d, b);
}
static void __host__ __device__ select(ge_precomp* t, int pos, signed char b) {
static void __host__ __device__ select(ge_precomp* __restrict__ t, int pos, signed char b) {
ge_precomp minust;
unsigned long x = b;
x >>= 63;
@@ -87,16 +87,16 @@ static void __host__ __device__ select(ge_precomp* t, int pos, signed char b) {
}
void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigned char* __restrict__ a) {
signed char e[64], carry;
unsigned char i;
ge_p1p1 r;
ge_p2 s;
ge_precomp t;
int i;
#pragma unroll 32
for (i = 0; i < 32; i++) {
e[2 * i] = a[i] & 15;
e[2 * i + 1] = a[i] >> 4;
}
#pragma unroll 63
#pragma unroll
for (i = 0, carry = 0; i < 63; i++) {
e[i] += carry;
carry = (e[i] + 8) >> 4;
@@ -104,11 +104,11 @@ void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigne
}
e[63] += carry;
#pragma unroll 10
for (int i = 0; i < 10; i++) h->X[i] = 0;
for (unsigned char i = 0; i < 10; i++) h->X[i] = 0;
fe_1(h->Y);
fe_1(h->Z);
#pragma unroll 10
for (int i = 0; i < 10; i++) h->T[i] = 0;
for (unsigned char i = 0; i < 10; i++) h->T[i] = 0;
#pragma unroll
for (i = 1; i < 64; i += 2) {
select(&t, i >> 1, e[i]);

View File

@@ -2,33 +2,23 @@
#define __ED25519_H
#include <f25519.cuh>
typedef struct {
fe X;
fe Y;
fe Z;
fe X, Y, Z;
} ge_p2;
typedef struct {
fe X;
fe Y;
fe Z;
fe T;
fe X, Y, Z, T;
} ge_p3;
typedef struct {
fe X;
fe Y;
fe Z;
fe T;
fe X, Y, Z, T;
} ge_p1p1;
typedef struct {
fe yplusx;
fe yminusx;
fe xy2d;
fe yplusx, yminusx, xy2d;
} ge_precomp;
void __host__ __device__ ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h);
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q);
void __host__ __device__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a);
void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
void __host__ __device__ ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);
void __host__ __device__ ge_p1p1_to_p2(ge_p2* __restrict__ r, const ge_p1p1* __restrict__ p);
void __host__ __device__ ge_p1p1_to_p3(ge_p3* __restrict__ r, const ge_p1p1* __restrict__ p);
void __host__ __device__ ge_p2_dbl(ge_p1p1* __restrict__ r, const ge_p2* __restrict__ p);
void __host__ __device__ ge_p3_dbl(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p);
void __host__ __device__ ge_p3_to_p2(ge_p2* __restrict__ r, const ge_p3* __restrict__ p);
#endif

View File

@@ -10,7 +10,7 @@ __device__ ds64 ktos(const unsigned char* key) noexcept {
str.data[65] = '\0';
return str;
}
__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept {
__device__ ds46 getAddr(const Addr16 rawAddr) noexcept {
ds46 addrStr;
unsigned pos = 0;
#pragma unroll 8

View File

@@ -8,11 +8,10 @@ struct ds46 {
};
using Addr16 = unsigned char[16];
using Key32 = unsigned char[32];
struct __align__(32) KeysBox32 {
Key32 PublicKey;
Key32 PrivateKey;
struct KeysBox32 {
Key32 PublicKey, PrivateKey;
};__device__ ds64 ktos(const unsigned char* key) noexcept;
__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept;
__device__ ds46 getAddr(const Addr16 rawAddr) noexcept;
__device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Addr16& rawAddr) noexcept;
__device__ void invertKey(const unsigned char* key, unsigned char* inverted);
#endif

View File

@@ -143,7 +143,7 @@ int main(int argc, char* argv[]) {
args<<<1, 1 >>>(d_argv, argc, d_result);
cudaDeviceSynchronize();
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
cudaGetDeviceProperties(&prop, 0);
cudaGetDeviceProperties_v2(&prop, 0);
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADSPB, 0);
const int tTh = mBpSM * prop.multiProcessorCount * THREADSPB;
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocks: %d (Threads: %d)\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADSPB, THREADSPB);
@@ -156,7 +156,7 @@ int main(int argc, char* argv[]) {
unsigned* h_seeds = (unsigned*)malloc(tTh * sizeof(unsigned));
cudaMemcpy(h_seeds, d_seeds, tTh * sizeof(unsigned), cudaMemcpyDeviceToHost);
if (checkSeeds(h_seeds, tTh)) {
fprintf(stderr, "Duplicate seeds found!\n");
fprintf(stderr, "Error: DUPLICATE SEEDS FOUND!\n");
free(h_seeds);
cudaFree(d_seeds);
cudaFree(rst);