fixes
This commit is contained in:
@@ -43,7 +43,7 @@ void __host__ __device__ ge_p3_dbl(ge_p1p1* __restrict__ r, const ge_p3* __restr
|
||||
fe_copy(q.Z, p->Z);
|
||||
ge_p2_dbl(r, &q);
|
||||
}
|
||||
void ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h) {
|
||||
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h) {
|
||||
fe recip, x, y;
|
||||
fe_invert(recip, h->Z);
|
||||
fe_mul(x, h->X, recip);
|
||||
@@ -62,7 +62,7 @@ static void __host__ __device__ cmov(ge_precomp* __restrict__ t, const ge_precom
|
||||
fe_cmov(t->yminusx, u->yminusx, b);
|
||||
fe_cmov(t->xy2d, u->xy2d, b);
|
||||
}
|
||||
static void __host__ __device__ select(ge_precomp* t, int pos, signed char b) {
|
||||
static void __host__ __device__ select(ge_precomp* __restrict__ t, int pos, signed char b) {
|
||||
ge_precomp minust;
|
||||
unsigned long x = b;
|
||||
x >>= 63;
|
||||
@@ -87,16 +87,16 @@ static void __host__ __device__ select(ge_precomp* t, int pos, signed char b) {
|
||||
}
|
||||
void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigned char* __restrict__ a) {
|
||||
signed char e[64], carry;
|
||||
unsigned char i;
|
||||
ge_p1p1 r;
|
||||
ge_p2 s;
|
||||
ge_precomp t;
|
||||
int i;
|
||||
#pragma unroll 32
|
||||
for (i = 0; i < 32; i++) {
|
||||
e[2 * i] = a[i] & 15;
|
||||
e[2 * i + 1] = a[i] >> 4;
|
||||
}
|
||||
#pragma unroll 63
|
||||
#pragma unroll
|
||||
for (i = 0, carry = 0; i < 63; i++) {
|
||||
e[i] += carry;
|
||||
carry = (e[i] + 8) >> 4;
|
||||
@@ -104,11 +104,11 @@ void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigne
|
||||
}
|
||||
e[63] += carry;
|
||||
#pragma unroll 10
|
||||
for (int i = 0; i < 10; i++) h->X[i] = 0;
|
||||
for (unsigned char i = 0; i < 10; i++) h->X[i] = 0;
|
||||
fe_1(h->Y);
|
||||
fe_1(h->Z);
|
||||
#pragma unroll 10
|
||||
for (int i = 0; i < 10; i++) h->T[i] = 0;
|
||||
for (unsigned char i = 0; i < 10; i++) h->T[i] = 0;
|
||||
#pragma unroll
|
||||
for (i = 1; i < 64; i += 2) {
|
||||
select(&t, i >> 1, e[i]);
|
||||
|
||||
@@ -2,33 +2,23 @@
|
||||
#define __ED25519_H
|
||||
#include <f25519.cuh>
|
||||
typedef struct {
|
||||
fe X;
|
||||
fe Y;
|
||||
fe Z;
|
||||
fe X, Y, Z;
|
||||
} ge_p2;
|
||||
typedef struct {
|
||||
fe X;
|
||||
fe Y;
|
||||
fe Z;
|
||||
fe T;
|
||||
fe X, Y, Z, T;
|
||||
} ge_p3;
|
||||
typedef struct {
|
||||
fe X;
|
||||
fe Y;
|
||||
fe Z;
|
||||
fe T;
|
||||
fe X, Y, Z, T;
|
||||
} ge_p1p1;
|
||||
typedef struct {
|
||||
fe yplusx;
|
||||
fe yminusx;
|
||||
fe xy2d;
|
||||
fe yplusx, yminusx, xy2d;
|
||||
} ge_precomp;
|
||||
void __host__ __device__ ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
|
||||
void __host__ __device__ ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
|
||||
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h);
|
||||
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q);
|
||||
void __host__ __device__ ge_scalarmult_base(ge_p3 *h, const unsigned char *a);
|
||||
void __host__ __device__ ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
|
||||
void __host__ __device__ ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
|
||||
void __host__ __device__ ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
|
||||
void __host__ __device__ ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
|
||||
void __host__ __device__ ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);
|
||||
void __host__ __device__ ge_p1p1_to_p2(ge_p2* __restrict__ r, const ge_p1p1* __restrict__ p);
|
||||
void __host__ __device__ ge_p1p1_to_p3(ge_p3* __restrict__ r, const ge_p1p1* __restrict__ p);
|
||||
void __host__ __device__ ge_p2_dbl(ge_p1p1* __restrict__ r, const ge_p2* __restrict__ p);
|
||||
void __host__ __device__ ge_p3_dbl(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p);
|
||||
void __host__ __device__ ge_p3_to_p2(ge_p2* __restrict__ r, const ge_p3* __restrict__ p);
|
||||
#endif
|
||||
@@ -10,7 +10,7 @@ __device__ ds64 ktos(const unsigned char* key) noexcept {
|
||||
str.data[65] = '\0';
|
||||
return str;
|
||||
}
|
||||
__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept {
|
||||
__device__ ds46 getAddr(const Addr16 rawAddr) noexcept {
|
||||
ds46 addrStr;
|
||||
unsigned pos = 0;
|
||||
#pragma unroll 8
|
||||
|
||||
@@ -8,11 +8,10 @@ struct ds46 {
|
||||
};
|
||||
using Addr16 = unsigned char[16];
|
||||
using Key32 = unsigned char[32];
|
||||
struct __align__(32) KeysBox32 {
|
||||
Key32 PublicKey;
|
||||
Key32 PrivateKey;
|
||||
struct KeysBox32 {
|
||||
Key32 PublicKey, PrivateKey;
|
||||
};__device__ ds64 ktos(const unsigned char* key) noexcept;
|
||||
__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept;
|
||||
__device__ ds46 getAddr(const Addr16 rawAddr) noexcept;
|
||||
__device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Addr16& rawAddr) noexcept;
|
||||
__device__ void invertKey(const unsigned char* key, unsigned char* inverted);
|
||||
#endif
|
||||
@@ -143,7 +143,7 @@ int main(int argc, char* argv[]) {
|
||||
args<<<1, 1 >>>(d_argv, argc, d_result);
|
||||
cudaDeviceSynchronize();
|
||||
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
cudaGetDeviceProperties_v2(&prop, 0);
|
||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGenKernel, THREADSPB, 0);
|
||||
const int tTh = mBpSM * prop.multiProcessorCount * THREADSPB;
|
||||
printf("High addrs: 2%02x+\nSMs: %d\nTotalThreads: %d\nBlocks: %d (Threads: %d)\n", h_high, prop.multiProcessorCount, tTh, tTh / THREADSPB, THREADSPB);
|
||||
@@ -156,7 +156,7 @@ int main(int argc, char* argv[]) {
|
||||
unsigned* h_seeds = (unsigned*)malloc(tTh * sizeof(unsigned));
|
||||
cudaMemcpy(h_seeds, d_seeds, tTh * sizeof(unsigned), cudaMemcpyDeviceToHost);
|
||||
if (checkSeeds(h_seeds, tTh)) {
|
||||
fprintf(stderr, "Duplicate seeds found!\n");
|
||||
fprintf(stderr, "Error: DUPLICATE SEEDS FOUND!\n");
|
||||
free(h_seeds);
|
||||
cudaFree(d_seeds);
|
||||
cudaFree(rst);
|
||||
|
||||
Reference in New Issue
Block a user