optimized
This commit is contained in:
parent
68472fc649
commit
3634311467
@ -1,30 +1,30 @@
|
||||
#include <edsign.cuh>
|
||||
#include <ed25519.cuh>
|
||||
#include <sha512.cuh>
|
||||
__device__ void expand_key(unsigned char* expanded, const unsigned char* secret) {
|
||||
__device__ __forceinline__ void expand_key(unsigned char* expanded, const unsigned char* secret) {
|
||||
struct sha512_state s;
|
||||
memcpy(&s, &sha512_initial_state, sizeof(s));
|
||||
sha512_final(&s, secret);
|
||||
sha512_get(&s, expanded);
|
||||
ed25519_prepare(expanded);
|
||||
}
|
||||
__device__ void pp(unsigned char* packed, const struct ed25519_pt* p) {
|
||||
__device__ __forceinline__ void pp(unsigned char* packed, const struct ed25519_pt* p) {
|
||||
unsigned char x[32], y[32];
|
||||
ed25519_unproject(x, y, p);
|
||||
ed25519_pack(packed, x, y);
|
||||
}
|
||||
__device__ void sm_pack(unsigned char* r, const unsigned char* k) {
|
||||
__device__ __forceinline__ void sm_pack(unsigned char* r, const unsigned char* k) {
|
||||
struct ed25519_pt p;
|
||||
ed25519_smult(&p, &ed25519_base, k);
|
||||
pp(r, &p);
|
||||
}
|
||||
__device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret) {
|
||||
__device__ __forceinline__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret) {
|
||||
unsigned char expanded[64];
|
||||
expand_key(expanded, secret);
|
||||
sm_pack(pub, expanded);
|
||||
}
|
||||
__device__ void compact_wipe(void* __restrict__ data) {
|
||||
volatile unsigned char* p = (volatile unsigned char*)data;
|
||||
__device__ __forceinline__ void compact_wipe(void* __restrict__ data) {
|
||||
unsigned char* p = (unsigned char*)data;
|
||||
unsigned long i = 0;
|
||||
#pragma unroll
|
||||
for (; i + 3 < 32; i += 4) {
|
||||
|
@ -1,9 +1,9 @@
|
||||
#ifndef __EDSIGN_CUH
|
||||
#define __EDSIGN_CUH
|
||||
__device__ void expand_key(unsigned char* expanded, const unsigned char* secret);
|
||||
__device__ void pp(unsigned char* packed, const struct ed25519_pt* p);
|
||||
__device__ void sm_pack(unsigned char* r, const unsigned char* k);
|
||||
__device__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret);
|
||||
__device__ void compact_wipe(void* __restrict__ data);
|
||||
__device__ __forceinline__ void expand_key(unsigned char* expanded, const unsigned char* secret);
|
||||
__device__ __forceinline__ void pp(unsigned char* packed, const struct ed25519_pt* p);
|
||||
__device__ __forceinline__ void sm_pack(unsigned char* r, const unsigned char* k);
|
||||
__device__ __forceinline__ void edsign_sec_to_pub(unsigned char* pub, const unsigned char* secret);
|
||||
__device__ __forceinline__ void compact_wipe(void* __restrict__ data);
|
||||
__device__ void ed25519_keygen(unsigned char private_key[64], unsigned char public_key[32], unsigned char random_seed[32]);
|
||||
#endif
|
@ -1,6 +1,5 @@
|
||||
#include <f25519.cuh>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdint.h>
|
||||
__device__ void f25519_copy(unsigned char* __restrict__ x, const unsigned char* __restrict__ a) {
|
||||
const uint4* src = reinterpret_cast<const uint4*>(a);
|
||||
uint4* dst = reinterpret_cast<uint4*>(x);
|
||||
@ -24,11 +23,12 @@ __device__ void f25519_select(unsigned char* __restrict__ dst, const unsigned ch
|
||||
d[0] = res0;
|
||||
d[1] = res1;
|
||||
}
|
||||
|
||||
__device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
||||
__align__(32) unsigned char minusp[32];
|
||||
unsigned c = (x[31] >> 7) * 19;
|
||||
x[31] &= 127;
|
||||
#pragma unroll 32
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c += x[i];
|
||||
x[i] = (unsigned char)c;
|
||||
@ -36,7 +36,7 @@ __device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
||||
}
|
||||
c = 19;
|
||||
#pragma unroll
|
||||
for (int i = 0; i + 1 < 32; i++) {
|
||||
for (int i = 0; i < 31; i++) {
|
||||
c += x[i];
|
||||
minusp[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
@ -47,89 +47,81 @@ __device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
||||
}
|
||||
__device__ void f25519_add(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||
unsigned c = 0;
|
||||
#pragma unroll 32
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c = (c >> 8) + ((unsigned)a[i]) + ((unsigned)b[i]);
|
||||
r[i] = (unsigned char)c;
|
||||
}
|
||||
r[31] &= 127;
|
||||
c = (c >> 7) * 19;
|
||||
#pragma unroll 32
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c += r[i];
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void f25519_sub(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||
unsigned c = 218;
|
||||
int i = 0;
|
||||
#pragma unroll
|
||||
for (i = 0; i + 1 < 32; i++) {
|
||||
for (int i = 0; i < 31; i++) {
|
||||
c += 65280 + ((unsigned)a[i]) - ((unsigned)b[i]);
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
c += ((unsigned)a[i]) - ((unsigned)b[i]);
|
||||
r[i] = (unsigned char)(c & 127);
|
||||
c += ((unsigned)a[31]) - ((unsigned)b[31]);
|
||||
r[31] = (unsigned char)(c & 127);
|
||||
c = (c >> 7) * 19;
|
||||
#pragma unroll 32
|
||||
for (i = 0; i < 32; i++) {
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c += r[i];
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void f25519_neg(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
|
||||
unsigned c = 218;
|
||||
int i = 0;
|
||||
#pragma unroll
|
||||
for (i = 0; i + 1 < 32; i++) {
|
||||
for (int i = 0; i < 31; i++) {
|
||||
c += 65280 - ((unsigned)a[i]);
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
c -= ((unsigned)a[i]);
|
||||
r[i] = (unsigned char)(c & 127);
|
||||
c -= ((unsigned)a[31]);
|
||||
r[31] = (unsigned char)(c & 127);
|
||||
c = (c >> 7) * 19;
|
||||
#pragma unroll 32
|
||||
for (i = 0; i < 32; i++) {
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c += r[i];
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void f25519_mul__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
||||
unsigned c = 0;
|
||||
#pragma unroll 32
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c >>= 8;
|
||||
for (int j = 0; j <= i; j++) {
|
||||
for (int j = 0; j <= i; j++)
|
||||
c += ((unsigned)a[j]) * ((unsigned)b[i - j]);
|
||||
}
|
||||
for (int j = i + 1; j < 32; j++) {
|
||||
for (int j = i + 1; j < 32; j++)
|
||||
c += ((unsigned)a[j]) * ((unsigned)b[32 + i - j]) * 38;
|
||||
}
|
||||
r[i] = (unsigned char)c;
|
||||
}
|
||||
r[31] &= 127;
|
||||
c = (c >> 7) * 19;
|
||||
#pragma unroll 32
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 32; i++) {
|
||||
c += r[i];
|
||||
r[i] = (unsigned char)c;
|
||||
c >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void f25519_inv__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ x) {
|
||||
__align__(32) unsigned char s[32];
|
||||
f25519_mul__distinct(s, x, x);
|
||||
f25519_mul__distinct(r, s, x);
|
||||
#pragma unroll 248
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 248; i++) {
|
||||
f25519_mul__distinct(s, r, r);
|
||||
f25519_mul__distinct(r, s, x);
|
||||
|
@ -28,14 +28,10 @@ __device__ __constant__ unsigned long round_k[80] = {
|
||||
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
|
||||
};
|
||||
__device__ __forceinline__ unsigned long load64(const unsigned char* __restrict__ x) {
|
||||
return (static_cast<unsigned long>(x[0]) << 56) |
|
||||
(static_cast<unsigned long>(x[1]) << 48) |
|
||||
(static_cast<unsigned long>(x[2]) << 40) |
|
||||
(static_cast<unsigned long>(x[3]) << 32) |
|
||||
(static_cast<unsigned long>(x[4]) << 24) |
|
||||
(static_cast<unsigned long>(x[5]) << 16) |
|
||||
(static_cast<unsigned long>(x[6]) << 8) |
|
||||
(static_cast<unsigned long>(x[7]));
|
||||
return (static_cast<unsigned long>(x[0]) << 56) | (static_cast<unsigned long>(x[1]) << 48) |
|
||||
(static_cast<unsigned long>(x[2]) << 40) | (static_cast<unsigned long>(x[3]) << 32) |
|
||||
(static_cast<unsigned long>(x[4]) << 24) | (static_cast<unsigned long>(x[5]) << 16) |
|
||||
(static_cast<unsigned long>(x[6]) << 8) | (static_cast<unsigned long>(x[7]));
|
||||
}
|
||||
__device__ __forceinline__ void store64(unsigned char* __restrict__ x, unsigned long v) {
|
||||
#pragma unroll 8
|
||||
|
@ -18,8 +18,7 @@ __device__ int parameters(const char* arg) noexcept {
|
||||
extract_substring(arg, substr_start, sub_arg, 256);
|
||||
if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) {
|
||||
unsigned tmp_high;
|
||||
if (cstring_to_ull(sub_arg, &tmp_high) != 0)
|
||||
return 1;
|
||||
if (cstring_to_ull(sub_arg, &tmp_high) != 0) return 1;
|
||||
d_high = tmp_high;
|
||||
}
|
||||
return 0;
|
||||
@ -91,7 +90,7 @@ __global__ void KeyGen(curandState* randStates) {
|
||||
}
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
const int thPerBlock = 128;
|
||||
const int thPerBlock = 256;
|
||||
int* d_result, mBpSM, h_high;
|
||||
char** d_argv;
|
||||
cudaDeviceProp prop;
|
||||
|
Loading…
x
Reference in New Issue
Block a user