Files
yggm/libs/f25519.cu
rcxpony ab6061f5bf owo
2025-08-21 13:05:05 +05:00

409 lines
17 KiB
Plaintext

#include <f25519.cuh>
// h = {0};
void __device__ __host__ fe_0(fe& __restrict__ h) {
h[0] = 0;
h[1] = 0;
h[2] = 0;
h[3] = 0;
h[4] = 0;
h[5] = 0;
h[6] = 0;
h[7] = 0;
h[8] = 0;
h[9] = 0;
}
// h = {1,0,0,0,0,0,0,0,0,0,0};
void __device__ __host__ fe_1(fe& __restrict__ h) {
h[0] = 1;
h[1] = 0;
h[2] = 0;
h[3] = 0;
h[4] = 0;
h[5] = 0;
h[6] = 0;
h[7] = 0;
h[8] = 0;
h[9] = 0;
}
// h = f + g
void __device__ __host__ fe_add(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
long h0 = f0 + g0, h1 = f1 + g1, h2 = f2 + g2, h3 = f3 + g3, h4 = f4 + g4, h5 = f5 + g5, h6 = f6 + g6, h7 = f7 + g7, h8 = f8 + g8, h9 = f9 + g9;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
void __device__ __host__ fe_cmov(fe& __restrict__ f, const fe& __restrict__ g, const unsigned b) {
int x = -b;
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
long x0 = (f0 ^ g0) & x, x1 = (f1 ^ g1) & x, x2 = (f2 ^ g2) & x, x3 = (f3 ^ g3) & x, x4 = (f4 ^ g4) & x, x5 = (f5 ^ g5) & x, x6 = (f6 ^ g6) & x, x7 = (f7 ^ g7) & x, x8 = (f8 ^ g8) & x, x9 = (f9 ^ g9) & x;
f[0] = f0 ^ x0;
f[1] = f1 ^ x1;
f[2] = f2 ^ x2;
f[3] = f3 ^ x3;
f[4] = f4 ^ x4;
f[5] = f5 ^ x5;
f[6] = f6 ^ x6;
f[7] = f7 ^ x7;
f[8] = f8 ^ x8;
f[9] = f9 ^ x9;
}
// h[0..9] = f[0..9]
void __device__ __host__ fe_copy(fe& __restrict__ h, const fe& __restrict__ f) {
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
h[0] = f0;
h[1] = f1;
h[2] = f2;
h[3] = f3;
h[4] = f4;
h[5] = f5;
h[6] = f6;
h[7] = f7;
h[8] = f8;
h[9] = f9;
}
void __device__ __host__ fe_invert(fe& __restrict__ out, const fe& __restrict__ z) {
fe t0, t1, t2, t3;
fe_mul(t0, z, z);
fe_mul(t1, t0, t0);
fe_mul(t1, t1, t1);
fe_mul(t1, z, t1);
fe_mul(t0, t0, t1);
fe_mul(t2, t0, t0);
fe_mul(t1, t1, t2);
fe_mul(t2, t1, t1);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t1, t2, t1);
fe_mul(t2, t1, t1);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t1);
fe_mul(t3, t2, t2);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t3, t3, t3);
fe_mul(t2, t3, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t2, t2, t2);
fe_mul(t1, t2, t1);
fe_mul(t2, t1, t1);
#pragma unroll 49
for (int i = 0; i < 49; ++i) {
fe_mul(t2, t2, t2);
}
fe_mul(t2, t2, t1);
fe_mul(t3, t2, t2);
#pragma unroll 99
for (int i = 0; i < 99; ++i) {
fe_mul(t3, t3, t3);
}
fe_mul(t2, t3, t2);
fe_mul(t2, t2, t2);
#pragma unroll 49
for (int i = 0; i < 49; ++i) {
fe_mul(t2, t2, t2);
}
fe_mul(t1, t2, t1);
fe_mul(t1, t1, t1);
fe_mul(t1, t1, t1);
fe_mul(t1, t1, t1);
fe_mul(t1, t1, t1);
fe_mul(t1, t1, t1);
fe_mul(out, t1, t0);
}
int __device__ __host__ fe_isnegative(const fe& __restrict__ f) {
unsigned char s[32];
fe_tobytes(s, f);
return s[0] & 1;
}
// h = f * g
void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
long f0g0 = f0 * g0, f0g1 = f0 * g1, f0g2 = f0 * g2, f0g3 = f0 * g3, f0g4 = f0 * g4, f0g5 = f0 * g5, f0g6 = f0 * g6, f0g7 = f0 * g7, f0g8 = f0 * g8, f0g9 = f0 * g9;
long f1g0 = f1 * g0, f1g1_2 = f1 * g1 << 1L, f1g2 = f1 * g2, f1g3_2 = f1 * g3 << 1L, f1g4 = f1 * g4, f1g5_2 = f1 * g5 << 1L, f1g6 = f1 * g6, f1g7_2 = f1 * g7 << 1L, f1g8 = f1 * g8, f1g9_38 = f1 * g9 * 38L;
long f2g0 = f2 * g0, f2g1 = f2 * g1, f2g2 = f2 * g2, f2g3 = f2 * g3, f2g4 = f2 * g4, f2g5 = f2 * g5, f2g6 = f2 * g6, f2g7 = f2 * g7, f2g8_19 = f2 * g8 * 19L, f2g9_19 = f2 * g9 * 38L >> 1L;
long f3g0 = f3 * g0, f3g1_2 = f3 * g1 << 1L, f3g2 = f3 * g2, f3g3_2 = f3 * g3 << 1L, f3g4 = f3 * g4, f3g5_2 = f3 * g5 << 1L, f3g6 = f3 * g6, f3g7_38 = f3 * g7 * 38L, f3g8_19 = f3 * g8 * 19L, f3g9_38 = f3 * g9 * 38L;
long f4g0 = f4 * g0, f4g1 = f4 * g1, f4g2 = f4 * g2, f4g3 = f4 * g3, f4g4 = f4 * g4, f4g5 = f4 * g5, f4g6_19 = f4 * g6 * 19L, f4g7_19 = f4 * g7 * 38L >> 1L, f4g8_19 = f4 * g8 * 19L, f4g9_19 = f4 * g9 * 38L >> 1L;
long f5g0 = f5 * g0, f5g1_2 = f5 * g1 << 1L, f5g2 = f5 * g2, f5g3_2 = f5 * g3 << 1L, f5g4 = f5 * g4, f5g5_38 = f5 * g5 * 38L, f5g6_19 = f5 * g6 * 19L, f5g7_38 = f5 * g7 * 38L, f5g8_19 = f5 * g8 * 19L, f5g9_38 = f5 * g9 * 38L;
long f6g0 = f6 * g0, f6g1 = f6 * g1, f6g2 = f6 * g2, f6g3 = f6 * g3, f6g4_19 = f6 * g4 * 19L, f6g5_19 = f6 * g5 * 38L >> 1L, f6g6_19 = f6 * g6 * 19L, f6g7_19 = f6 * g7 * 38L >> 1L, f6g8_19 = f6 * g8 * 19L, f6g9_19 = f6 * g9 * 38L >> 1L;
long f7g0 = f7 * g0, f7g1_2 = f7 * g1 << 1L, f7g2 = f7 * g2, f7g3_38 = f7 * g3 * 38L, f7g4_19 = f7 * g4 * 19L, f7g5_38 = f7 * g5 * 38L, f7g6_19 = f7 * g6 * 19L, f7g7_38 = f7 * g7 * 38L, f7g8_19 = f7 * g8 * 19L, f7g9_38 = f7 * g9 * 38L;
long f8g0 = f8 * g0, f8g1 = f8 * g1, f8g2_19 = f8 * g2 * 19L, f8g3_19 = f8 * g3 * 38L >> 1L, f8g4_19 = f8 * g4 * 19L, f8g5_19 = f8 * g5 * 38L >> 1L, f8g6_19 = f8 * g6 * 19L, f8g7_19 = f8 * g7 * 38L >> 1L, f8g8_19 = f8 * g8 * 19L, f8g9_19 = f8 * g9 * 38L >> 1L;
long f9g0 = f9 * g0, f9g1_38 = f9 * g1 * 38L, f9g2_19 = f9 * g2 * 19L, f9g3_38 = f9 * g3 * 38L, f9g4_19 = f9 * g4 * 19L, f9g5_38 = f9 * g5 * 38L, f9g6_19 = f9 * g6 * 19L, f9g7_38 = f9 * g7 * 38L, f9g8_19 = f9 * g8 * 19L, f9g9_38 = f9 * g9 * 38L;
long h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
long h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
long h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
long h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
long h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
long h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
long h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
long h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
long h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
long h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
long carry = (h0 + 33554432L) >> 26L;
h1 += carry;
h0 -= carry << 26L;
carry = (h4 + 33554432L) >> 26L;
h5 += carry;
h4 -= carry << 26L;
carry = (h1 + 16777216L) >> 25L;
h2 += carry;
h1 -= carry << 25L;
carry = (h5 + 16777216L) >> 25L;
h6 += carry;
h5 -= carry << 25L;
carry = (h2 + 33554432L) >> 26L;
h3 += carry;
h2 -= carry << 26L;
carry = (h6 + 33554432L) >> 26L;
h7 += carry;
h6 -= carry << 26L;
carry = (h3 + 16777216L) >> 25L;
h4 += carry;
h3 -= carry << 25L;
carry = (h7 + 16777216L) >> 25L;
h8 += carry;
h7 -= carry << 25L;
carry = (h4 + 33554432L) >> 26L;
h5 += carry;
h4 -= carry << 26L;
carry = (h8 + 33554432L) >> 26L;
h9 += carry;
h8 -= carry << 26L;
carry = (h9 + 16777216L) >> 25L;
h0 += carry * 19L;
h9 -= carry << 25L;
carry = (h0 + 33554432L) >> 26L;
h1 += carry;
h0 -= carry << 26L;
h[0] = static_cast<int>(h0);
h[1] = static_cast<int>(h1);
h[2] = static_cast<int>(h2);
h[3] = static_cast<int>(h3);
h[4] = static_cast<int>(h4);
h[5] = static_cast<int>(h5);
h[6] = static_cast<int>(h6);
h[7] = static_cast<int>(h7);
h[8] = static_cast<int>(h8);
h[9] = static_cast<int>(h9);
}
// h = -f
void __host__ __device__ fe_neg(fe& __restrict__ h, const fe& __restrict__ f) {
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
long h0 = -f0, h1 = -f1, h2 = -f2, h3 = -f3, h4 = -f4, h5 = -f5, h6 = -f6, h7 = -f7, h8 = -f8, h9 = -f9;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
void __host__ __device__ fe_mul2(fe& __restrict__ h, const fe& __restrict__ f) {
const long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
long f0_2 = f0 << 1, f1_2 = f1 << 1, f2_2 = f2 << 1, f3_2 = f3 << 1, f4_2 = f4 << 1, f5_2 = f5 << 1, f6_2 = f6 << 1, f7_2 = f7 << 1;
long f5_38 = 38 * f5, f6_19 = 19 * f6, f7_38 = 38 * f7, f8_19 = 19 * f8, f9_38 = 38 * f9;
long f0f0 = f0 * f0, f0f1_2 = f0_2 * f1, f0f2_2 = f0_2 * f2, f0f3_2 = f0_2 * f3, f0f4_2 = f0_2 * f4, f0f5_2 = f0_2 * f5, f0f6_2 = f0_2 * f6, f0f7_2 = f0_2 * f7, f0f8_2 = f0_2 * f8, f0f9_2 = f0_2 * f9;
long f1f1_2 = f1_2 * f1, f1f2_2 = f1_2 * f2, f1f3_4 = f1_2 * f3_2, f1f4_2 = f1_2 * f4, f1f5_4 = f1_2 * f5_2, f1f6_2 = f1_2 * f6, f1f7_4 = f1_2 * f7_2, f1f8_2 = f1_2 * f8, f1f9_76 = f1_2 * f9_38;
long f2f2 = f2 * f2, f2f3_2 = f2_2 * f3, f2f4_2 = f2_2 * f4, f2f5_2 = f2_2 * f5, f2f6_2 = f2_2 * f6, f2f7_2 = f2_2 * f7, f2f8_38 = f2_2 * f8_19, f2f9_38 = f2 * f9_38;
long f3f3_2 = f3_2 * f3, f3f4_2 = f3_2 * f4, f3f5_4 = f3_2 * f5_2, f3f6_2 = f3_2 * f6, f3f7_76 = f3_2 * f7_38, f3f8_38 = f3_2 * f8_19, f3f9_76 = f3_2 * f9_38;
long f4f4 = f4 * f4, f4f5_2 = f4_2 * f5, f4f6_38 = f4_2 * f6_19, f4f7_38 = f4 * f7_38, f4f8_38 = f4_2 * f8_19, f4f9_38 = f4 * f9_38;
long f5f5_38 = f5 * f5_38, f5f6_38 = f5_2 * f6_19, f5f7_76 = f5_2 * f7_38, f5f8_38 = f5_2 * f8_19, f5f9_76 = f5_2 * f9_38;
long f6f6_19 = f6 * f6_19, f6f7_38 = f6 * f7_38, f6f8_38 = f6_2 * f8_19, f6f9_38 = f6 * f9_38;
long f7f7_38 = f7 * f7_38, f7f8_38 = f7_2 * f8_19, f7f9_76 = f7_2 * f9_38;
long f8f8_19 = f8 * f8_19, f8f9_38 = f8 * f9_38;
long f9f9_38 = f9 * f9_38;
long h0 = (f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38) << 1;
long h1 = (f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38) << 1;
long h2 = (f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19) << 1;
long h3 = (f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38) << 1;
long h4 = (f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38) << 1;
long h5 = (f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38) << 1;
long h6 = (f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19) << 1;
long h7 = (f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38) << 1;
long h8 = (f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38) << 1;
long h9 = (f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2) << 1;
long carry = (h0 + 33554432L) >> 26L;
h1 += carry;
h0 -= carry << 26L;
carry = (h4 + 33554432L) >> 26L;
h5 += carry;
h4 -= carry << 26L;
carry = (h1 + 16777216L) >> 25L;
h2 += carry;
h1 -= carry << 25L;
carry = (h5 + 16777216L) >> 25L;
h6 += carry;
h5 -= carry << 25L;
carry = (h2 + 33554432L) >> 26L;
h3 += carry;
h2 -= carry << 26L;
carry = (h6 + 33554432L) >> 26L;
h7 += carry;
h6 -= carry << 26L;
carry = (h3 + 16777216L) >> 25L;
h4 += carry;
h3 -= carry << 25L;
carry = (h7 + 16777216L) >> 25L;
h8 += carry;
h7 -= carry << 25L;
carry = (h4 + 33554432L) >> 26L;
h5 += carry;
h4 -= carry << 26L;
carry = (h8 + 33554432L) >> 26L;
h9 += carry;
h8 -= carry << 26L;
carry = (h9 + 16777216L) >> 25L;
h0 += carry * 19L;
h9 -= carry << 25L;
carry = (h0 + 33554432L) >> 26L;
h1 += carry;
h0 -= carry << 26L;
h[0] = static_cast<int>(h0);
h[1] = static_cast<int>(h1);
h[2] = static_cast<int>(h2);
h[3] = static_cast<int>(h3);
h[4] = static_cast<int>(h4);
h[5] = static_cast<int>(h5);
h[6] = static_cast<int>(h6);
h[7] = static_cast<int>(h7);
h[8] = static_cast<int>(h8);
h[9] = static_cast<int>(h9);
}
void __device__ __host__ fe_sub(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
long h0 = f0 - g0, h1 = f1 - g1, h2 = f2 - g2, h3 = f3 - g3, h4 = f4 - g4, h5 = f5 - g5, h6 = f6 - g6, h7 = f7 - g7, h8 = f8 - g8, h9 = f9 - g9;
h[0] = h0;
h[1] = h1;
h[2] = h2;
h[3] = h3;
h[4] = h4;
h[5] = h5;
h[6] = h6;
h[7] = h7;
h[8] = h8;
h[9] = h9;
}
void __device__ __host__ fe_tobytes(unsigned char* __restrict__ s, const fe& __restrict__ h) {
long h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4], h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9], carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9, q;
q = (19 * h9 + (1 << 24)) >> 25;
q = (h0 + q) >> 26;
q = (h1 + q) >> 25;
q = (h2 + q) >> 26;
q = (h3 + q) >> 25;
q = (h4 + q) >> 26;
q = (h5 + q) >> 25;
q = (h6 + q) >> 26;
q = (h7 + q) >> 25;
q = (h8 + q) >> 26;
q = (h9 + q) >> 25;
h0 += 19 * q;
carry0 = h0 >> 26;
h1 += carry0;
h0 -= carry0 << 26;
carry1 = h1 >> 25;
h2 += carry1;
h1 -= carry1 << 25;
carry2 = h2 >> 26;
h3 += carry2;
h2 -= carry2 << 26;
carry3 = h3 >> 25;
h4 += carry3;
h3 -= carry3 << 25;
carry4 = h4 >> 26;
h5 += carry4;
h4 -= carry4 << 26;
carry5 = h5 >> 25;
h6 += carry5;
h5 -= carry5 << 25;
carry6 = h6 >> 26;
h7 += carry6;
h6 -= carry6 << 26;
carry7 = h7 >> 25;
h8 += carry7;
h7 -= carry7 << 25;
carry8 = h8 >> 26;
h9 += carry8;
h8 -= carry8 << 26;
carry9 = h9 >> 25;
h9 -= carry9 << 25;
s[0] = static_cast<unsigned char>(h0);
s[1] = static_cast<unsigned char>(h0 >> 8);
s[2] = static_cast<unsigned char>(h0 >> 16);
s[3] = static_cast<unsigned char>((h0 >> 24) | (h1 << 2));
s[4] = static_cast<unsigned char>(h1 >> 6);
s[5] = static_cast<unsigned char>(h1 >> 14);
s[6] = static_cast<unsigned char>((h1 >> 22) | (h2 << 3));
s[7] = static_cast<unsigned char>(h2 >> 5);
s[8] = static_cast<unsigned char>(h2 >> 13);
s[9] = static_cast<unsigned char>((h2 >> 21) | (h3 << 5));
s[10] = static_cast<unsigned char>(h3 >> 3);
s[11] = static_cast<unsigned char>(h3 >> 11);
s[12] = static_cast<unsigned char>((h3 >> 19) | (h4 << 6));
s[13] = static_cast<unsigned char>(h4 >> 2);
s[14] = static_cast<unsigned char>(h4 >> 10);
s[15] = static_cast<unsigned char>(h4 >> 18);
s[16] = static_cast<unsigned char>(h5);
s[17] = static_cast<unsigned char>(h5 >> 8);
s[18] = static_cast<unsigned char>(h5 >> 16);
s[19] = static_cast<unsigned char>((h5 >> 24) | (h6 << 1));
s[20] = static_cast<unsigned char>(h6 >> 7);
s[21] = static_cast<unsigned char>(h6 >> 15);
s[22] = static_cast<unsigned char>((h6 >> 23) | (h7 << 3));
s[23] = static_cast<unsigned char>(h7 >> 5);
s[24] = static_cast<unsigned char>(h7 >> 13);
s[25] = static_cast<unsigned char>((h7 >> 21) | (h8 << 4));
s[26] = static_cast<unsigned char>(h8 >> 4);
s[27] = static_cast<unsigned char>(h8 >> 12);
s[28] = static_cast<unsigned char>((h8 >> 20) | (h9 << 6));
s[29] = static_cast<unsigned char>(h9 >> 2);
s[30] = static_cast<unsigned char>(h9 >> 10);
s[31] = static_cast<unsigned char>(h9 >> 18);
}