|
|
|
|
@@ -24,36 +24,9 @@ void __device__ __host__ fe_1(fe& __restrict__ h) {
|
|
|
|
|
h[9] = 0;
|
|
|
|
|
}
|
|
|
|
|
void __device__ __host__ fe_add(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
|
|
|
|
signed int f0 = f[0];
|
|
|
|
|
signed int f1 = f[1];
|
|
|
|
|
signed int f2 = f[2];
|
|
|
|
|
signed int f3 = f[3];
|
|
|
|
|
signed int f4 = f[4];
|
|
|
|
|
signed int f5 = f[5];
|
|
|
|
|
signed int f6 = f[6];
|
|
|
|
|
signed int f7 = f[7];
|
|
|
|
|
signed int f8 = f[8];
|
|
|
|
|
signed int f9 = f[9];
|
|
|
|
|
signed int g0 = g[0];
|
|
|
|
|
signed int g1 = g[1];
|
|
|
|
|
signed int g2 = g[2];
|
|
|
|
|
signed int g3 = g[3];
|
|
|
|
|
signed int g4 = g[4];
|
|
|
|
|
signed int g5 = g[5];
|
|
|
|
|
signed int g6 = g[6];
|
|
|
|
|
signed int g7 = g[7];
|
|
|
|
|
signed int g8 = g[8];
|
|
|
|
|
signed int g9 = g[9];
|
|
|
|
|
signed int h0 = f0 + g0;
|
|
|
|
|
signed int h1 = f1 + g1;
|
|
|
|
|
signed int h2 = f2 + g2;
|
|
|
|
|
signed int h3 = f3 + g3;
|
|
|
|
|
signed int h4 = f4 + g4;
|
|
|
|
|
signed int h5 = f5 + g5;
|
|
|
|
|
signed int h6 = f6 + g6;
|
|
|
|
|
signed int h7 = f7 + g7;
|
|
|
|
|
signed int h8 = f8 + g8;
|
|
|
|
|
signed int h9 = f9 + g9;
|
|
|
|
|
const signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
|
|
|
|
const signed int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
|
|
|
|
const signed int h0 = f0 + g0, h1 = f1 + g1, h2 = f2 + g2, h3 = f3 + g3, h4 = f4 + g4, h5 = f5 + g5, h6 = f6 + g6, h7 = f7 + g7, h8 = f8 + g8, h9 = f9 + g9;
|
|
|
|
|
h[0] = h0;
|
|
|
|
|
h[1] = h1;
|
|
|
|
|
h[2] = h2;
|
|
|
|
|
@@ -66,47 +39,10 @@ void __device__ __host__ fe_add(fe& __restrict__ h, const fe& __restrict__ f, co
|
|
|
|
|
h[9] = h9;
|
|
|
|
|
}
|
|
|
|
|
void __device__ __host__ fe_cmov(fe& __restrict__ f, const fe& __restrict__ g, unsigned b) {
|
|
|
|
|
unsigned bb = ~b + 1;
|
|
|
|
|
signed int f0 = f[0];
|
|
|
|
|
signed int f1 = f[1];
|
|
|
|
|
signed int f2 = f[2];
|
|
|
|
|
signed int f3 = f[3];
|
|
|
|
|
signed int f4 = f[4];
|
|
|
|
|
signed int f5 = f[5];
|
|
|
|
|
signed int f6 = f[6];
|
|
|
|
|
signed int f7 = f[7];
|
|
|
|
|
signed int f8 = f[8];
|
|
|
|
|
signed int f9 = f[9];
|
|
|
|
|
signed int g0 = g[0];
|
|
|
|
|
signed int g1 = g[1];
|
|
|
|
|
signed int g2 = g[2];
|
|
|
|
|
signed int g3 = g[3];
|
|
|
|
|
signed int g4 = g[4];
|
|
|
|
|
signed int g5 = g[5];
|
|
|
|
|
signed int g6 = g[6];
|
|
|
|
|
signed int g7 = g[7];
|
|
|
|
|
signed int g8 = g[8];
|
|
|
|
|
signed int g9 = g[9];
|
|
|
|
|
signed int x0 = f0 ^ g0;
|
|
|
|
|
signed int x1 = f1 ^ g1;
|
|
|
|
|
signed int x2 = f2 ^ g2;
|
|
|
|
|
signed int x3 = f3 ^ g3;
|
|
|
|
|
signed int x4 = f4 ^ g4;
|
|
|
|
|
signed int x5 = f5 ^ g5;
|
|
|
|
|
signed int x6 = f6 ^ g6;
|
|
|
|
|
signed int x7 = f7 ^ g7;
|
|
|
|
|
signed int x8 = f8 ^ g8;
|
|
|
|
|
signed int x9 = f9 ^ g9;
|
|
|
|
|
x0 &= bb;
|
|
|
|
|
x1 &= bb;
|
|
|
|
|
x2 &= bb;
|
|
|
|
|
x3 &= bb;
|
|
|
|
|
x4 &= bb;
|
|
|
|
|
x5 &= bb;
|
|
|
|
|
x6 &= bb;
|
|
|
|
|
x7 &= bb;
|
|
|
|
|
x8 &= bb;
|
|
|
|
|
x9 &= bb;
|
|
|
|
|
const signed int x = -b;
|
|
|
|
|
const signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
|
|
|
|
const signed int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
|
|
|
|
const signed int x0 = (f0 ^ g0) & x, x1 = (f1 ^ g1) & x, x2 = (f2 ^ g2) & x, x3 = (f3 ^ g3) & x, x4 = (f4 ^ g4) & x, x5 = (f5 ^ g5) & x, x6 = (f6 ^ g6) & x, x7 = (f7 ^ g7) & x, x8 = (f8 ^ g8) & x, x9 = (f9 ^ g9) & x;
|
|
|
|
|
f[0] = f0 ^ x0;
|
|
|
|
|
f[1] = f1 ^ x1;
|
|
|
|
|
f[2] = f2 ^ x2;
|
|
|
|
|
@@ -119,16 +55,7 @@ void __device__ __host__ fe_cmov(fe& __restrict__ f, const fe& __restrict__ g, u
|
|
|
|
|
f[9] = f9 ^ x9;
|
|
|
|
|
}
|
|
|
|
|
void __device__ __host__ fe_copy(fe& __restrict__ h, const fe& __restrict__ f) {
|
|
|
|
|
signed int f0 = f[0];
|
|
|
|
|
signed int f1 = f[1];
|
|
|
|
|
signed int f2 = f[2];
|
|
|
|
|
signed int f3 = f[3];
|
|
|
|
|
signed int f4 = f[4];
|
|
|
|
|
signed int f5 = f[5];
|
|
|
|
|
signed int f6 = f[6];
|
|
|
|
|
signed int f7 = f[7];
|
|
|
|
|
signed int f8 = f[8];
|
|
|
|
|
signed int f9 = f[9];
|
|
|
|
|
const signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
|
|
|
|
h[0] = f0;
|
|
|
|
|
h[1] = f1;
|
|
|
|
|
h[2] = f2;
|
|
|
|
|
@@ -153,10 +80,10 @@ void __device__ __host__ fe_invert(fe& __restrict__ out, const fe& __restrict__
|
|
|
|
|
fe_sq(t2, t0);
|
|
|
|
|
fe_mul(t1, t1, t2);
|
|
|
|
|
fe_sq(t2, t1);
|
|
|
|
|
#pragma unroll 4
|
|
|
|
|
for (unsigned char i = 1; i < 5; ++i) {
|
|
|
|
|
fe_sq(t2, t2);
|
|
|
|
|
}
|
|
|
|
|
fe_sq(t2, t2);
|
|
|
|
|
fe_sq(t2, t2);
|
|
|
|
|
fe_sq(t2, t2);
|
|
|
|
|
fe_sq(t2, t2);
|
|
|
|
|
fe_mul(t1, t2, t1);
|
|
|
|
|
fe_sq(t2, t1);
|
|
|
|
|
#pragma unroll 9
|
|
|
|
|
@@ -253,26 +180,8 @@ void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, co
|
|
|
|
|
h[9] = static_cast<int>(h9);
|
|
|
|
|
}
|
|
|
|
|
void __host__ __device__ fe_neg(fe& __restrict__ h, const fe& __restrict__ f) {
|
|
|
|
|
signed int f0 = f[0];
|
|
|
|
|
signed int f1 = f[1];
|
|
|
|
|
signed int f2 = f[2];
|
|
|
|
|
signed int f3 = f[3];
|
|
|
|
|
signed int f4 = f[4];
|
|
|
|
|
signed int f5 = f[5];
|
|
|
|
|
signed int f6 = f[6];
|
|
|
|
|
signed int f7 = f[7];
|
|
|
|
|
signed int f8 = f[8];
|
|
|
|
|
signed int f9 = f[9];
|
|
|
|
|
signed int h0 = -f0;
|
|
|
|
|
signed int h1 = -f1;
|
|
|
|
|
signed int h2 = -f2;
|
|
|
|
|
signed int h3 = -f3;
|
|
|
|
|
signed int h4 = -f4;
|
|
|
|
|
signed int h5 = -f5;
|
|
|
|
|
signed int h6 = -f6;
|
|
|
|
|
signed int h7 = -f7;
|
|
|
|
|
signed int h8 = -f8;
|
|
|
|
|
signed int h9 = -f9;
|
|
|
|
|
const signed long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
|
|
|
|
const signed int h0 = -f0, h1 = -f1, h2 = -f2, h3 = -f3, h4 = -f4, h5 = -f5, h6 = -f6, h7 = -f7, h8 = -f8, h9 = -f9;
|
|
|
|
|
h[0] = h0;
|
|
|
|
|
h[1] = h1;
|
|
|
|
|
h[2] = h2;
|
|
|
|
|
@@ -285,84 +194,19 @@ void __host__ __device__ fe_neg(fe& __restrict__ h, const fe& __restrict__ f) {
|
|
|
|
|
h[9] = h9;
|
|
|
|
|
}
|
|
|
|
|
void __host__ __device__ fe_sq2(fe& __restrict__ h, const fe& __restrict__ f) {
|
|
|
|
|
int f0 = f[0];
|
|
|
|
|
int f1 = f[1];
|
|
|
|
|
int f2 = f[2];
|
|
|
|
|
int f3 = f[3];
|
|
|
|
|
int f4 = f[4];
|
|
|
|
|
int f5 = f[5];
|
|
|
|
|
int f6 = f[6];
|
|
|
|
|
int f7 = f[7];
|
|
|
|
|
int f8 = f[8];
|
|
|
|
|
int f9 = f[9];
|
|
|
|
|
int f0_2 = f0 << 1;
|
|
|
|
|
int f1_2 = f1 << 1;
|
|
|
|
|
int f2_2 = f2 << 1;
|
|
|
|
|
int f3_2 = f3 << 1;
|
|
|
|
|
int f4_2 = f4 << 1;
|
|
|
|
|
int f5_2 = f5 << 1;
|
|
|
|
|
int f6_2 = f6 << 1;
|
|
|
|
|
int f7_2 = f7 << 1;
|
|
|
|
|
int f5_38 = 38 * f5;
|
|
|
|
|
int f6_19 = 19 * f6;
|
|
|
|
|
int f7_38 = 38 * f7;
|
|
|
|
|
int f8_19 = 19 * f8;
|
|
|
|
|
int f9_38 = 38 * f9;
|
|
|
|
|
long f0f0 = f0 * static_cast<long>(f0);
|
|
|
|
|
long f0f1_2 = f0_2 * static_cast<long>(f1);
|
|
|
|
|
long f0f2_2 = f0_2 * static_cast<long>(f2);
|
|
|
|
|
long f0f3_2 = f0_2 * static_cast<long>(f3);
|
|
|
|
|
long f0f4_2 = f0_2 * static_cast<long>(f4);
|
|
|
|
|
long f0f5_2 = f0_2 * static_cast<long>(f5);
|
|
|
|
|
long f0f6_2 = f0_2 * static_cast<long>(f6);
|
|
|
|
|
long f0f7_2 = f0_2 * static_cast<long>(f7);
|
|
|
|
|
long f0f8_2 = f0_2 * static_cast<long>(f8);
|
|
|
|
|
long f0f9_2 = f0_2 * static_cast<long>(f9);
|
|
|
|
|
long f1f1_2 = f1_2 * static_cast<long>(f1);
|
|
|
|
|
long f1f2_2 = f1_2 * static_cast<long>(f2);
|
|
|
|
|
long f1f3_4 = f1_2 * static_cast<long>(f3_2);
|
|
|
|
|
long f1f4_2 = f1_2 * static_cast<long>(f4);
|
|
|
|
|
long f1f5_4 = f1_2 * static_cast<long>(f5_2);
|
|
|
|
|
long f1f6_2 = f1_2 * static_cast<long>(f6);
|
|
|
|
|
long f1f7_4 = f1_2 * static_cast<long>(f7_2);
|
|
|
|
|
long f1f8_2 = f1_2 * static_cast<long>(f8);
|
|
|
|
|
long f1f9_76 = f1_2 * static_cast<long>(f9_38);
|
|
|
|
|
long f2f2 = f2 * static_cast<long>(f2);
|
|
|
|
|
long f2f3_2 = f2_2 * static_cast<long>(f3);
|
|
|
|
|
long f2f4_2 = f2_2 * static_cast<long>(f4);
|
|
|
|
|
long f2f5_2 = f2_2 * static_cast<long>(f5);
|
|
|
|
|
long f2f6_2 = f2_2 * static_cast<long>(f6);
|
|
|
|
|
long f2f7_2 = f2_2 * static_cast<long>(f7);
|
|
|
|
|
long f2f8_38 = f2_2 * static_cast<long>(f8_19);
|
|
|
|
|
long f2f9_38 = f2 * static_cast<long>(f9_38);
|
|
|
|
|
long f3f3_2 = f3_2 * static_cast<long>(f3);
|
|
|
|
|
long f3f4_2 = f3_2 * static_cast<long>(f4);
|
|
|
|
|
long f3f5_4 = f3_2 * static_cast<long>(f5_2);
|
|
|
|
|
long f3f6_2 = f3_2 * static_cast<long>(f6);
|
|
|
|
|
long f3f7_76 = f3_2 * static_cast<long>(f7_38);
|
|
|
|
|
long f3f8_38 = f3_2 * static_cast<long>(f8_19);
|
|
|
|
|
long f3f9_76 = f3_2 * static_cast<long>(f9_38);
|
|
|
|
|
long f4f4 = f4 * static_cast<long>(f4);
|
|
|
|
|
long f4f5_2 = f4_2 * static_cast<long>(f5);
|
|
|
|
|
long f4f6_38 = f4_2 * static_cast<long>(f6_19);
|
|
|
|
|
long f4f7_38 = f4 * static_cast<long>(f7_38);
|
|
|
|
|
long f4f8_38 = f4_2 * static_cast<long>(f8_19);
|
|
|
|
|
long f4f9_38 = f4 * static_cast<long>(f9_38);
|
|
|
|
|
long f5f5_38 = f5 * static_cast<long>(f5_38);
|
|
|
|
|
long f5f6_38 = f5_2 * static_cast<long>(f6_19);
|
|
|
|
|
long f5f7_76 = f5_2 * static_cast<long>(f7_38);
|
|
|
|
|
long f5f8_38 = f5_2 * static_cast<long>(f8_19);
|
|
|
|
|
long f5f9_76 = f5_2 * static_cast<long>(f9_38);
|
|
|
|
|
long f6f6_19 = f6 * static_cast<long>(f6_19);
|
|
|
|
|
long f6f7_38 = f6 * static_cast<long>(f7_38);
|
|
|
|
|
long f6f8_38 = f6_2 * static_cast<long>(f8_19);
|
|
|
|
|
long f6f9_38 = f6 * static_cast<long>(f9_38);
|
|
|
|
|
long f7f7_38 = f7 * static_cast<long>(f7_38);
|
|
|
|
|
long f7f8_38 = f7_2 * static_cast<long>(f8_19);
|
|
|
|
|
long f7f9_76 = f7_2 * static_cast<long>(f9_38);
|
|
|
|
|
long f8f8_19 = f8 * static_cast<long>(f8_19);
|
|
|
|
|
long f8f9_38 = f8 * static_cast<long>(f9_38);
|
|
|
|
|
long f9f9_38 = f9 * static_cast<long>(f9_38);
|
|
|
|
|
const signed long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
|
|
|
|
const signed long f0_2 = f0 << 1, f1_2 = f1 << 1, f2_2 = f2 << 1, f3_2 = f3 << 1, f4_2 = f4 << 1, f5_2 = f5 << 1, f6_2 = f6 << 1, f7_2 = f7 << 1;
|
|
|
|
|
const signed int f5_38 = 38 * f5, f6_19 = 19 * f6, f7_38 = 38 * f7, f8_19 = 19 * f8, f9_38 = 38 * f9;
|
|
|
|
|
const long f0f0 = f0 * f0, f0f1_2 = f0_2 * f1, f0f2_2 = f0_2 * f2, f0f3_2 = f0_2 * f3, f0f4_2 = f0_2 * f4, f0f5_2 = f0_2 * f5, f0f6_2 = f0_2 * f6, f0f7_2 = f0_2 * f7, f0f8_2 = f0_2 * f8, f0f9_2 = f0_2 * f9;
|
|
|
|
|
const long f1f1_2 = f1_2 * f1, f1f2_2 = f1_2 * f2, f1f3_4 = f1_2 * f3_2, f1f4_2 = f1_2 * f4, f1f5_4 = f1_2 * f5_2, f1f6_2 = f1_2 * f6, f1f7_4 = f1_2 * f7_2, f1f8_2 = f1_2 * f8, f1f9_76 = f1_2 * f9_38;
|
|
|
|
|
const long f2f2 = f2 * f2, f2f3_2 = f2_2 * f3, f2f4_2 = f2_2 * f4, f2f5_2 = f2_2 * f5, f2f6_2 = f2_2 * f6, f2f7_2 = f2_2 * f7, f2f8_38 = f2_2 * f8_19, f2f9_38 = f2 * f9_38;
|
|
|
|
|
const long f3f3_2 = f3_2 * f3, f3f4_2 = f3_2 * f4, f3f5_4 = f3_2 * f5_2, f3f6_2 = f3_2 * f6, f3f7_76 = f3_2 * f7_38, f3f8_38 = f3_2 * f8_19, f3f9_76 = f3_2 * f9_38;
|
|
|
|
|
const long f4f4 = f4 * f4, f4f5_2 = f4_2 * f5, f4f6_38 = f4_2 * f6_19, f4f7_38 = f4 * f7_38, f4f8_38 = f4_2 * f8_19, f4f9_38 = f4 * f9_38;
|
|
|
|
|
const long f5f5_38 = f5 * f5_38, f5f6_38 = f5_2 * f6_19, f5f7_76 = f5_2 * f7_38, f5f8_38 = f5_2 * f8_19, f5f9_76 = f5_2 * f9_38;
|
|
|
|
|
const long f6f6_19 = f6 * f6_19, f6f7_38 = f6 * f7_38, f6f8_38 = f6_2 * f8_19, f6f9_38 = f6 * f9_38;
|
|
|
|
|
const long f7f7_38 = f7 * f7_38, f7f8_38 = f7_2 * f8_19, f7f9_76 = f7_2 * f9_38;
|
|
|
|
|
const long f8f8_19 = f8 * f8_19, f8f9_38 = f8 * f9_38;
|
|
|
|
|
const long f9f9_38 = f9 * f9_38;
|
|
|
|
|
long h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
|
|
|
|
|
long h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
|
|
|
|
|
long h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
|
|
|
|
|
@@ -373,7 +217,6 @@ void __host__ __device__ fe_sq2(fe& __restrict__ h, const fe& __restrict__ f) {
|
|
|
|
|
long h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
|
|
|
|
|
long h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
|
|
|
|
|
long h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
|
|
|
|
|
long carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
|
|
|
|
|
h0 <<= 1;
|
|
|
|
|
h1 <<= 1;
|
|
|
|
|
h2 <<= 1;
|
|
|
|
|
@@ -384,84 +227,33 @@ void __host__ __device__ fe_sq2(fe& __restrict__ h, const fe& __restrict__ f) {
|
|
|
|
|
h7 <<= 1;
|
|
|
|
|
h8 <<= 1;
|
|
|
|
|
h9 <<= 1;
|
|
|
|
|
carry0 = (h0 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h1 += carry0;
|
|
|
|
|
h0 -= carry0 << 26;
|
|
|
|
|
carry4 = (h4 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h5 += carry4;
|
|
|
|
|
h4 -= carry4 << 26;
|
|
|
|
|
carry1 = (h1 + static_cast<long>(1 << 24)) >> 25;
|
|
|
|
|
h2 += carry1;
|
|
|
|
|
h1 -= carry1 << 25;
|
|
|
|
|
carry5 = (h5 + static_cast<long>(1 << 24)) >> 25;
|
|
|
|
|
h6 += carry5;
|
|
|
|
|
h5 -= carry5 << 25;
|
|
|
|
|
carry2 = (h2 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h3 += carry2;
|
|
|
|
|
h2 -= carry2 << 26;
|
|
|
|
|
carry6 = (h6 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h7 += carry6;
|
|
|
|
|
h6 -= carry6 << 26;
|
|
|
|
|
carry3 = (h3 + static_cast<long>(1 << 24)) >> 25;
|
|
|
|
|
h4 += carry3;
|
|
|
|
|
h3 -= carry3 << 25;
|
|
|
|
|
carry7 = (h7 + static_cast<long>(1 << 24)) >> 25;
|
|
|
|
|
h8 += carry7;
|
|
|
|
|
h7 -= carry7 << 25;
|
|
|
|
|
carry4 = (h4 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h5 += carry4;
|
|
|
|
|
h4 -= carry4 << 26;
|
|
|
|
|
carry8 = (h8 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h9 += carry8;
|
|
|
|
|
h8 -= carry8 << 26;
|
|
|
|
|
carry9 = (h9 + static_cast<long>(1 << 24)) >> 25;
|
|
|
|
|
h0 += carry9 * 19;
|
|
|
|
|
h9 -= carry9 << 25;
|
|
|
|
|
carry0 = (h0 + static_cast<long>(1 << 25)) >> 26;
|
|
|
|
|
h1 += carry0;
|
|
|
|
|
h0 -= carry0 << 26;
|
|
|
|
|
h[0] = (int)h0;
|
|
|
|
|
h[1] = (int)h1;
|
|
|
|
|
h[2] = (int)h2;
|
|
|
|
|
h[3] = (int)h3;
|
|
|
|
|
h[4] = (int)h4;
|
|
|
|
|
h[5] = (int)h5;
|
|
|
|
|
h[6] = (int)h6;
|
|
|
|
|
h[7] = (int)h7;
|
|
|
|
|
h[8] = (int)h8;
|
|
|
|
|
h[9] = (int)h9;
|
|
|
|
|
signed long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
|
|
|
|
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
|
|
|
|
carry = (h1 + 16777216L) >> 25L; h2 += carry; h1 -= carry << 25L;
|
|
|
|
|
carry = (h5 + 16777216L) >> 25L; h6 += carry; h5 -= carry << 25L;
|
|
|
|
|
carry = (h2 + 33554432L) >> 26L; h3 += carry; h2 -= carry << 26L;
|
|
|
|
|
carry = (h6 + 33554432L) >> 26L; h7 += carry; h6 -= carry << 26L;
|
|
|
|
|
carry = (h3 + 16777216L) >> 25L; h4 += carry; h3 -= carry << 25L;
|
|
|
|
|
carry = (h7 + 16777216L) >> 25L; h8 += carry; h7 -= carry << 25L;
|
|
|
|
|
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
|
|
|
|
carry = (h8 + 33554432L) >> 26L; h9 += carry; h8 -= carry << 26L;
|
|
|
|
|
carry = (h9 + 16777216L) >> 25L; h0 += carry * 19L; h9 -= carry << 25L;
|
|
|
|
|
carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
|
|
|
|
h[0] = static_cast<int>(h0);
|
|
|
|
|
h[1] = static_cast<int>(h1);
|
|
|
|
|
h[2] = static_cast<int>(h2);
|
|
|
|
|
h[3] = static_cast<int>(h3);
|
|
|
|
|
h[4] = static_cast<int>(h4);
|
|
|
|
|
h[5] = static_cast<int>(h5);
|
|
|
|
|
h[6] = static_cast<int>(h6);
|
|
|
|
|
h[7] = static_cast<int>(h7);
|
|
|
|
|
h[8] = static_cast<int>(h8);
|
|
|
|
|
h[9] = static_cast<int>(h9);
|
|
|
|
|
}
|
|
|
|
|
void __device__ __host__ fe_sub(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
|
|
|
|
signed int f0 = f[0];
|
|
|
|
|
signed int f1 = f[1];
|
|
|
|
|
signed int f2 = f[2];
|
|
|
|
|
signed int f3 = f[3];
|
|
|
|
|
signed int f4 = f[4];
|
|
|
|
|
signed int f5 = f[5];
|
|
|
|
|
signed int f6 = f[6];
|
|
|
|
|
signed int f7 = f[7];
|
|
|
|
|
signed int f8 = f[8];
|
|
|
|
|
signed int f9 = f[9];
|
|
|
|
|
signed int g0 = g[0];
|
|
|
|
|
signed int g1 = g[1];
|
|
|
|
|
signed int g2 = g[2];
|
|
|
|
|
signed int g3 = g[3];
|
|
|
|
|
signed int g4 = g[4];
|
|
|
|
|
signed int g5 = g[5];
|
|
|
|
|
signed int g6 = g[6];
|
|
|
|
|
signed int g7 = g[7];
|
|
|
|
|
signed int g8 = g[8];
|
|
|
|
|
signed int g9 = g[9];
|
|
|
|
|
signed int h0 = f0 - g0;
|
|
|
|
|
signed int h1 = f1 - g1;
|
|
|
|
|
signed int h2 = f2 - g2;
|
|
|
|
|
signed int h3 = f3 - g3;
|
|
|
|
|
signed int h4 = f4 - g4;
|
|
|
|
|
signed int h5 = f5 - g5;
|
|
|
|
|
signed int h6 = f6 - g6;
|
|
|
|
|
signed int h7 = f7 - g7;
|
|
|
|
|
signed int h8 = f8 - g8;
|
|
|
|
|
signed int h9 = f9 - g9;
|
|
|
|
|
const signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
|
|
|
|
const signed int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
|
|
|
|
const signed int h0 = f0 - g0, h1 = f1 - g1, h2 = f2 - g2, h3 = f3 - g3, h4 = f4 - g4, h5 = f5 - g5, h6 = f6 - g6, h7 = f7 - g7, h8 = f8 - g8, h9 = f9 - g9;
|
|
|
|
|
h[0] = h0;
|
|
|
|
|
h[1] = h1;
|
|
|
|
|
h[2] = h2;
|
|
|
|
|
@@ -474,8 +266,7 @@ void __device__ __host__ fe_sub(fe& __restrict__ h, const fe& __restrict__ f, co
|
|
|
|
|
h[9] = h9;
|
|
|
|
|
}
|
|
|
|
|
void __device__ __host__ fe_tobytes(unsigned char* __restrict__ s, const fe& __restrict__ h) {
|
|
|
|
|
int h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4], h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9], q;
|
|
|
|
|
int carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
|
|
|
|
|
signed int h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4], h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9], carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9, q;
|
|
|
|
|
q = (19 * h9 + (1 << 24)) >> 25;
|
|
|
|
|
q = (h0 + q) >> 26;
|
|
|
|
|
q = (h1 + q) >> 25;
|
|
|
|
|
@@ -517,7 +308,7 @@ void __device__ __host__ fe_tobytes(unsigned char* __restrict__ s, const fe& __r
|
|
|
|
|
h8 -= carry8 << 26;
|
|
|
|
|
carry9 = h9 >> 25;
|
|
|
|
|
h9 -= carry9 << 25;
|
|
|
|
|
s[0] = static_cast<unsigned char>(h0 >> 0);
|
|
|
|
|
s[0] = static_cast<unsigned char>(h0);
|
|
|
|
|
s[1] = static_cast<unsigned char>(h0 >> 8);
|
|
|
|
|
s[2] = static_cast<unsigned char>(h0 >> 16);
|
|
|
|
|
s[3] = static_cast<unsigned char>((h0 >> 24) | (h1 << 2));
|
|
|
|
|
@@ -533,7 +324,7 @@ void __device__ __host__ fe_tobytes(unsigned char* __restrict__ s, const fe& __r
|
|
|
|
|
s[13] = static_cast<unsigned char>(h4 >> 2);
|
|
|
|
|
s[14] = static_cast<unsigned char>(h4 >> 10);
|
|
|
|
|
s[15] = static_cast<unsigned char>(h4 >> 18);
|
|
|
|
|
s[16] = static_cast<unsigned char>(h5 >> 0);
|
|
|
|
|
s[16] = static_cast<unsigned char>(h5);
|
|
|
|
|
s[17] = static_cast<unsigned char>(h5 >> 8);
|
|
|
|
|
s[18] = static_cast<unsigned char>(h5 >> 16);
|
|
|
|
|
s[19] = static_cast<unsigned char>((h5 >> 24) | (h6 << 1));
|
|
|
|
|
|