optimized
This commit is contained in:
parent
68472fc649
commit
3634311467
@ -1,30 +1,30 @@
|
|||||||
#include <edsign.cuh>
|
#include <edsign.cuh>
|
||||||
#include <ed25519.cuh>
|
#include <ed25519.cuh>
|
||||||
#include <sha512.cuh>
|
#include <sha512.cuh>
|
||||||
__device__ void expand_key(unsigned char* expanded, const unsigned char* secret) {
|
__device__ __forceinline__ void expand_key(unsigned char* expanded, const unsigned char* secret) {
|
||||||
struct sha512_state s;
|
struct sha512_state s;
|
||||||
memcpy(&s, &sha512_initial_state, sizeof(s));
|
memcpy(&s, &sha512_initial_state, sizeof(s));
|
||||||
sha512_final(&s, secret);
|
sha512_final(&s, secret);
|
||||||
sha512_get(&s, expanded);
|
sha512_get(&s, expanded);
|
||||||
ed25519_prepare(expanded);
|
ed25519_prepare(expanded);
|
||||||
}
|
}
|
||||||
__device__ void pp(unsigned char* packed, const struct ed25519_pt* p) {
|
__device__ __forceinline__ void pp(unsigned char* packed, const struct ed25519_pt* p) {
|
||||||
unsigned char x[32], y[32];
|
unsigned char x[32], y[32];
|
||||||
ed25519_unproject(x, y, p);
|
ed25519_unproject(x, y, p);
|
||||||
ed25519_pack(packed, x, y);
|
ed25519_pack(packed, x, y);
|
||||||
}
|
}
|
||||||
__device__ void sm_pack(unsigned char* r, const unsigned char* k) {
|
__device__ __forceinline__ void sm_pack(unsigned char* r, const unsigned char* k) {
|
||||||
struct ed25519_pt p;
|
struct ed25519_pt p;
|
||||||
ed25519_smult(&p, &ed25519_base, k);
|
ed25519_smult(&p, &ed25519_base, k);
|
||||||
pp(r, &p);
|
pp(r, &p);
|
||||||
}
|
}
|
||||||
__device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret) {
|
__device__ __forceinline__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret) {
|
||||||
unsigned char expanded[64];
|
unsigned char expanded[64];
|
||||||
expand_key(expanded, secret);
|
expand_key(expanded, secret);
|
||||||
sm_pack(pub, expanded);
|
sm_pack(pub, expanded);
|
||||||
}
|
}
|
||||||
__device__ void compact_wipe(void* __restrict__ data) {
|
__device__ __forceinline__ void compact_wipe(void* __restrict__ data) {
|
||||||
volatile unsigned char* p = (volatile unsigned char*)data;
|
unsigned char* p = (unsigned char*)data;
|
||||||
unsigned long i = 0;
|
unsigned long i = 0;
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (; i + 3 < 32; i += 4) {
|
for (; i + 3 < 32; i += 4) {
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef __EDSIGN_CUH
|
#ifndef __EDSIGN_CUH
|
||||||
#define __EDSIGN_CUH
|
#define __EDSIGN_CUH
|
||||||
__device__ void expand_key(unsigned char* expanded, const unsigned char* secret);
|
__device__ __forceinline__ void expand_key(unsigned char* expanded, const unsigned char* secret);
|
||||||
__device__ void pp(unsigned char* packed, const struct ed25519_pt* p);
|
__device__ __forceinline__ void pp(unsigned char* packed, const struct ed25519_pt* p);
|
||||||
__device__ void sm_pack(unsigned char* r, const unsigned char* k);
|
__device__ __forceinline__ void sm_pack(unsigned char* r, const unsigned char* k);
|
||||||
__device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret);
|
__device__ __forceinline__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret);
|
||||||
__device__ void compact_wipe(void* __restrict__ data);
|
__device__ __forceinline__ void compact_wipe(void* __restrict__ data);
|
||||||
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]);
|
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]);
|
||||||
#endif
|
#endif
|
@ -1,6 +1,5 @@
|
|||||||
#include <f25519.cuh>
|
#include <f25519.cuh>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <stdint.h>
|
|
||||||
__device__ void f25519_copy(unsigned char* __restrict__ x, const unsigned char* __restrict__ a) {
|
__device__ void f25519_copy(unsigned char* __restrict__ x, const unsigned char* __restrict__ a) {
|
||||||
const uint4* src = reinterpret_cast<const uint4*>(a);
|
const uint4* src = reinterpret_cast<const uint4*>(a);
|
||||||
uint4* dst = reinterpret_cast<uint4*>(x);
|
uint4* dst = reinterpret_cast<uint4*>(x);
|
||||||
@ -24,11 +23,12 @@ __device__ void f25519_select(unsigned char* __restrict__ dst, const unsigned ch
|
|||||||
d[0] = res0;
|
d[0] = res0;
|
||||||
d[1] = res1;
|
d[1] = res1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
__device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
||||||
__align__(32) unsigned char minusp[32];
|
__align__(32) unsigned char minusp[32];
|
||||||
unsigned c = (x[31] >> 7) * 19;
|
unsigned c = (x[31] >> 7) * 19;
|
||||||
x[31] &= 127;
|
x[31] &= 127;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c += x[i];
|
c += x[i];
|
||||||
x[i] = (unsigned char)c;
|
x[i] = (unsigned char)c;
|
||||||
@ -36,7 +36,7 @@ __device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
|||||||
}
|
}
|
||||||
c = 19;
|
c = 19;
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i + 1 < 32; i++) {
|
for (int i = 0; i < 31; i++) {
|
||||||
c += x[i];
|
c += x[i];
|
||||||
minusp[i] = (unsigned char)c;
|
minusp[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
@ -47,89 +47,81 @@ __device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
|||||||
}
|
}
|
||||||
__device__ void f25519_add(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
__device__ void f25519_add(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||||
unsigned c = 0;
|
unsigned c = 0;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c = (c >> 8) + ((unsigned)a[i]) + ((unsigned)b[i]);
|
c = (c >> 8) + ((unsigned)a[i]) + ((unsigned)b[i]);
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
}
|
}
|
||||||
r[31] &= 127;
|
r[31] &= 127;
|
||||||
c = (c >> 7) * 19;
|
c = (c >> 7) * 19;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c += r[i];
|
c += r[i];
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void f25519_sub(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
__device__ void f25519_sub(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||||
unsigned c = 218;
|
unsigned c = 218;
|
||||||
int i = 0;
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (i = 0; i + 1 < 32; i++) {
|
for (int i = 0; i < 31; i++) {
|
||||||
c += 65280 + ((unsigned)a[i]) - ((unsigned)b[i]);
|
c += 65280 + ((unsigned)a[i]) - ((unsigned)b[i]);
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
}
|
}
|
||||||
c += ((unsigned)a[i]) - ((unsigned)b[i]);
|
c += ((unsigned)a[31]) - ((unsigned)b[31]);
|
||||||
r[i] = (unsigned char)(c & 127);
|
r[31] = (unsigned char)(c & 127);
|
||||||
c = (c >> 7) * 19;
|
c = (c >> 7) * 19;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c += r[i];
|
c += r[i];
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void f25519_neg(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
|
__device__ void f25519_neg(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
|
||||||
unsigned c = 218;
|
unsigned c = 218;
|
||||||
int i = 0;
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (i = 0; i + 1 < 32; i++) {
|
for (int i = 0; i < 31; i++) {
|
||||||
c += 65280 - ((unsigned)a[i]);
|
c += 65280 - ((unsigned)a[i]);
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
}
|
}
|
||||||
c -= ((unsigned)a[i]);
|
c -= ((unsigned)a[31]);
|
||||||
r[i] = (unsigned char)(c & 127);
|
r[31] = (unsigned char)(c & 127);
|
||||||
c = (c >> 7) * 19;
|
c = (c >> 7) * 19;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c += r[i];
|
c += r[i];
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void f25519_mul__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
__device__ void f25519_mul__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||||
unsigned c = 0;
|
unsigned c = 0;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
for (int j = 0; j <= i; j++) {
|
for (int j = 0; j <= i; j++)
|
||||||
c += ((unsigned)a[j]) * ((unsigned)b[i - j]);
|
c += ((unsigned)a[j]) * ((unsigned)b[i - j]);
|
||||||
}
|
for (int j = i + 1; j < 32; j++)
|
||||||
for (int j = i + 1; j < 32; j++) {
|
|
||||||
c += ((unsigned)a[j]) * ((unsigned)b[32 + i - j]) * 38;
|
c += ((unsigned)a[j]) * ((unsigned)b[32 + i - j]) * 38;
|
||||||
}
|
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
}
|
}
|
||||||
r[31] &= 127;
|
r[31] &= 127;
|
||||||
c = (c >> 7) * 19;
|
c = (c >> 7) * 19;
|
||||||
#pragma unroll 32
|
#pragma unroll
|
||||||
for (int i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
c += r[i];
|
c += r[i];
|
||||||
r[i] = (unsigned char)c;
|
r[i] = (unsigned char)c;
|
||||||
c >>= 8;
|
c >>= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ void f25519_inv__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ x) {
|
__device__ void f25519_inv__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ x) {
|
||||||
__align__(32) unsigned char s[32];
|
__align__(32) unsigned char s[32];
|
||||||
f25519_mul__distinct(s, x, x);
|
f25519_mul__distinct(s, x, x);
|
||||||
f25519_mul__distinct(r, s, x);
|
f25519_mul__distinct(r, s, x);
|
||||||
#pragma unroll 248
|
#pragma unroll
|
||||||
for (int i = 0; i < 248; i++) {
|
for (int i = 0; i < 248; i++) {
|
||||||
f25519_mul__distinct(s, r, r);
|
f25519_mul__distinct(s, r, r);
|
||||||
f25519_mul__distinct(r, s, x);
|
f25519_mul__distinct(r, s, x);
|
||||||
@ -142,4 +134,4 @@ __device__ void f25519_inv__distinct(unsigned char* __restrict__ r, const unsign
|
|||||||
f25519_mul__distinct(r, s, x);
|
f25519_mul__distinct(r, s, x);
|
||||||
f25519_mul__distinct(s, r, r);
|
f25519_mul__distinct(s, r, r);
|
||||||
f25519_mul__distinct(r, s, x);
|
f25519_mul__distinct(r, s, x);
|
||||||
}
|
}
|
@ -28,14 +28,10 @@ __device__ __constant__ unsigned long round_k[80] = {
|
|||||||
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
|
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
|
||||||
};
|
};
|
||||||
__device__ __forceinline__ unsigned long load64(const unsigned char* __restrict__ x) {
|
__device__ __forceinline__ unsigned long load64(const unsigned char* __restrict__ x) {
|
||||||
return (static_cast<unsigned long>(x[0]) << 56) |
|
return (static_cast<unsigned long>(x[0]) << 56) | (static_cast<unsigned long>(x[1]) << 48) |
|
||||||
(static_cast<unsigned long>(x[1]) << 48) |
|
(static_cast<unsigned long>(x[2]) << 40) | (static_cast<unsigned long>(x[3]) << 32) |
|
||||||
(static_cast<unsigned long>(x[2]) << 40) |
|
(static_cast<unsigned long>(x[4]) << 24) | (static_cast<unsigned long>(x[5]) << 16) |
|
||||||
(static_cast<unsigned long>(x[3]) << 32) |
|
(static_cast<unsigned long>(x[6]) << 8) | (static_cast<unsigned long>(x[7]));
|
||||||
(static_cast<unsigned long>(x[4]) << 24) |
|
|
||||||
(static_cast<unsigned long>(x[5]) << 16) |
|
|
||||||
(static_cast<unsigned long>(x[6]) << 8) |
|
|
||||||
(static_cast<unsigned long>(x[7]));
|
|
||||||
}
|
}
|
||||||
__device__ __forceinline__ void store64(unsigned char* __restrict__ x, unsigned long v) {
|
__device__ __forceinline__ void store64(unsigned char* __restrict__ x, unsigned long v) {
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
|
@ -18,8 +18,7 @@ __device__ int parameters(const char* arg) noexcept {
|
|||||||
extract_substring(arg, substr_start, sub_arg, 256);
|
extract_substring(arg, substr_start, sub_arg, 256);
|
||||||
if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) {
|
if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) {
|
||||||
unsigned tmp_high;
|
unsigned tmp_high;
|
||||||
if (cstring_to_ull(sub_arg, &tmp_high) != 0)
|
if (cstring_to_ull(sub_arg, &tmp_high) != 0) return 1;
|
||||||
return 1;
|
|
||||||
d_high = tmp_high;
|
d_high = tmp_high;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@ -91,7 +90,7 @@ __global__ void KeyGen(curandState* randStates) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
const int thPerBlock = 128;
|
const int thPerBlock = 256;
|
||||||
int* d_result, mBpSM, h_high;
|
int* d_result, mBpSM, h_high;
|
||||||
char** d_argv;
|
char** d_argv;
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user