owo
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
git clone https://rcxpony.name/rcxpony/yggm.git && cd yggm
|
||||
meson setup build -Dbuildtype=release && cd build
|
||||
meson compile
|
||||
./yggm -t 10 // for CPU
|
||||
./yggmc -t 10 // for CPU
|
||||
./yggmcu -t 10 // for GPU
|
||||
```
|
||||
# ToDo
|
||||
|
||||
@@ -11,7 +11,4 @@
|
||||
#define THDIVTHPB (tTh / THREADSPB)
|
||||
#define WHCOND true
|
||||
#endif
|
||||
#ifndef USE_AVX2
|
||||
#define USE_AVX2 0
|
||||
#endif
|
||||
#endif
|
||||
@@ -1,6 +1,6 @@
|
||||
#include <ed25519.cuh>
|
||||
#include <precomp_data.h>
|
||||
#include <f25519.cuh>
|
||||
#include <precomp_data.h>
|
||||
void __host__ __device__ ge_madd(ge_p1p1& __restrict__ r, const ge_p3& __restrict__ p, const precomp_data& __restrict__ q) {
|
||||
fe t0;
|
||||
fe_add(r.X, p.Y, p.X);
|
||||
@@ -14,7 +14,7 @@ void __host__ __device__ ge_madd(ge_p1p1& __restrict__ r, const ge_p3& __restric
|
||||
fe_add(r.Z, t0, r.T);
|
||||
fe_sub(r.T, t0, r.T);
|
||||
}
|
||||
//r.XYZ = p.XYZ * p.TZT
|
||||
// r.XYZ = p.XYZ * p.TZT
|
||||
void __host__ __device__ ge_p1p1_to_p2(ge_p2& __restrict__ r, const ge_p1p1& __restrict__ p) {
|
||||
fe_mul(r.X, p.X, p.T);
|
||||
fe_mul(r.Y, p.Y, p.Z);
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#include <ed25519.cuh>
|
||||
#include <edsign.cuh>
|
||||
#include <ed25519.cuh>
|
||||
#include <sha512.cuh>
|
||||
#include <ed25519.cuh>
|
||||
__device__ __forceinline__ void expand_key(unsigned char* __restrict__ expanded, const unsigned char* __restrict__ secret) {
|
||||
struct sha512_state s;
|
||||
memcpy(&s, &sha512_initial_state, sizeof(s));
|
||||
|
||||
112
libs/f25519.cu
112
libs/f25519.cu
@@ -1,5 +1,5 @@
|
||||
#include <f25519.cuh>
|
||||
//h = {0};
|
||||
// h = {0};
|
||||
void __device__ __host__ fe_0(fe& __restrict__ h) {
|
||||
h[0] = 0;
|
||||
h[1] = 0;
|
||||
@@ -12,7 +12,7 @@ void __device__ __host__ fe_0(fe& __restrict__ h) {
|
||||
h[8] = 0;
|
||||
h[9] = 0;
|
||||
}
|
||||
//h = {1,0,0,0,0,0,0,0,0,0,0};
|
||||
// h = {1,0,0,0,0,0,0,0,0,0,0};
|
||||
void __device__ __host__ fe_1(fe& __restrict__ h) {
|
||||
h[0] = 1;
|
||||
h[1] = 0;
|
||||
@@ -25,7 +25,7 @@ void __device__ __host__ fe_1(fe& __restrict__ h) {
|
||||
h[8] = 0;
|
||||
h[9] = 0;
|
||||
}
|
||||
//h = f + g
|
||||
// h = f + g
|
||||
void __device__ __host__ fe_add(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
||||
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
@@ -57,7 +57,7 @@ void __device__ __host__ fe_cmov(fe& __restrict__ f, const fe& __restrict__ g, c
|
||||
f[8] = f8 ^ x8;
|
||||
f[9] = f9 ^ x9;
|
||||
}
|
||||
//h[0..9] = f[0..9]
|
||||
// h[0..9] = f[0..9]
|
||||
void __device__ __host__ fe_copy(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
h[0] = f0;
|
||||
@@ -80,11 +80,11 @@ void __device__ __host__ fe_invert(fe& __restrict__ out, const fe& __restrict__
|
||||
fe_mul(t0, t0, t1);
|
||||
fe_mul(t2, t0, t0);
|
||||
fe_mul(t1, t1, t2);
|
||||
fe_mul(t2, t1,t1);
|
||||
fe_mul(t2, t1, t1);
|
||||
fe_mul(t2, t2, t2);
|
||||
fe_mul(t2, t2, t2);
|
||||
fe_mul(t2, t2, t2);
|
||||
fe_mul(t2, t2, t2);
|
||||
fe_mul(t2, t2,t2);
|
||||
fe_mul(t1, t2, t1);
|
||||
fe_mul(t2, t1, t1);
|
||||
fe_mul(t2, t2, t2);
|
||||
@@ -159,7 +159,7 @@ int __device__ __host__ fe_isnegative(const fe& __restrict__ f) {
|
||||
fe_tobytes(s, f);
|
||||
return s[0] & 1;
|
||||
}
|
||||
//h = f * g
|
||||
// h = f * g
|
||||
void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
||||
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
@@ -183,18 +183,42 @@ void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, co
|
||||
long h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
|
||||
long h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
|
||||
long h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
|
||||
long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
||||
carry = (h1 + 16777216L) >> 25L; h2 += carry; h1 -= carry << 25L;
|
||||
carry = (h5 + 16777216L) >> 25L; h6 += carry; h5 -= carry << 25L;
|
||||
carry = (h2 + 33554432L) >> 26L; h3 += carry; h2 -= carry << 26L;
|
||||
carry = (h6 + 33554432L) >> 26L; h7 += carry; h6 -= carry << 26L;
|
||||
carry = (h3 + 16777216L) >> 25L; h4 += carry; h3 -= carry << 25L;
|
||||
carry = (h7 + 16777216L) >> 25L; h8 += carry; h7 -= carry << 25L;
|
||||
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
||||
carry = (h8 + 33554432L) >> 26L; h9 += carry; h8 -= carry << 26L;
|
||||
carry = (h9 + 16777216L) >> 25L; h0 += carry * 19L; h9 -= carry << 25L;
|
||||
carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
long carry = (h0 + 33554432L) >> 26L;
|
||||
h1 += carry;
|
||||
h0 -= carry << 26L;
|
||||
carry = (h4 + 33554432L) >> 26L;
|
||||
h5 += carry;
|
||||
h4 -= carry << 26L;
|
||||
carry = (h1 + 16777216L) >> 25L;
|
||||
h2 += carry;
|
||||
h1 -= carry << 25L;
|
||||
carry = (h5 + 16777216L) >> 25L;
|
||||
h6 += carry;
|
||||
h5 -= carry << 25L;
|
||||
carry = (h2 + 33554432L) >> 26L;
|
||||
h3 += carry;
|
||||
h2 -= carry << 26L;
|
||||
carry = (h6 + 33554432L) >> 26L;
|
||||
h7 += carry;
|
||||
h6 -= carry << 26L;
|
||||
carry = (h3 + 16777216L) >> 25L;
|
||||
h4 += carry;
|
||||
h3 -= carry << 25L;
|
||||
carry = (h7 + 16777216L) >> 25L;
|
||||
h8 += carry;
|
||||
h7 -= carry << 25L;
|
||||
carry = (h4 + 33554432L) >> 26L;
|
||||
h5 += carry;
|
||||
h4 -= carry << 26L;
|
||||
carry = (h8 + 33554432L) >> 26L;
|
||||
h9 += carry;
|
||||
h8 -= carry << 26L;
|
||||
carry = (h9 + 16777216L) >> 25L;
|
||||
h0 += carry * 19L;
|
||||
h9 -= carry << 25L;
|
||||
carry = (h0 + 33554432L) >> 26L;
|
||||
h1 += carry;
|
||||
h0 -= carry << 26L;
|
||||
h[0] = static_cast<int>(h0);
|
||||
h[1] = static_cast<int>(h1);
|
||||
h[2] = static_cast<int>(h2);
|
||||
@@ -206,7 +230,7 @@ void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, co
|
||||
h[8] = static_cast<int>(h8);
|
||||
h[9] = static_cast<int>(h9);
|
||||
}
|
||||
//h = -f
|
||||
// h = -f
|
||||
void __host__ __device__ fe_neg(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
long h0 = -f0, h1 = -f1, h2 = -f2, h3 = -f3, h4 = -f4, h5 = -f5, h6 = -f6, h7 = -f7, h8 = -f8, h9 = -f9;
|
||||
@@ -245,18 +269,42 @@ void __host__ __device__ fe_mul2(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
long h7 = (f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38) << 1;
|
||||
long h8 = (f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38) << 1;
|
||||
long h9 = (f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2) << 1;
|
||||
long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
||||
carry = (h1 + 16777216L) >> 25L; h2 += carry; h1 -= carry << 25L;
|
||||
carry = (h5 + 16777216L) >> 25L; h6 += carry; h5 -= carry << 25L;
|
||||
carry = (h2 + 33554432L) >> 26L; h3 += carry; h2 -= carry << 26L;
|
||||
carry = (h6 + 33554432L) >> 26L; h7 += carry; h6 -= carry << 26L;
|
||||
carry = (h3 + 16777216L) >> 25L; h4 += carry; h3 -= carry << 25L;
|
||||
carry = (h7 + 16777216L) >> 25L; h8 += carry; h7 -= carry << 25L;
|
||||
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
||||
carry = (h8 + 33554432L) >> 26L; h9 += carry; h8 -= carry << 26L;
|
||||
carry = (h9 + 16777216L) >> 25L; h0 += carry * 19L; h9 -= carry << 25L;
|
||||
carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
long carry = (h0 + 33554432L) >> 26L;
|
||||
h1 += carry;
|
||||
h0 -= carry << 26L;
|
||||
carry = (h4 + 33554432L) >> 26L;
|
||||
h5 += carry;
|
||||
h4 -= carry << 26L;
|
||||
carry = (h1 + 16777216L) >> 25L;
|
||||
h2 += carry;
|
||||
h1 -= carry << 25L;
|
||||
carry = (h5 + 16777216L) >> 25L;
|
||||
h6 += carry;
|
||||
h5 -= carry << 25L;
|
||||
carry = (h2 + 33554432L) >> 26L;
|
||||
h3 += carry;
|
||||
h2 -= carry << 26L;
|
||||
carry = (h6 + 33554432L) >> 26L;
|
||||
h7 += carry;
|
||||
h6 -= carry << 26L;
|
||||
carry = (h3 + 16777216L) >> 25L;
|
||||
h4 += carry;
|
||||
h3 -= carry << 25L;
|
||||
carry = (h7 + 16777216L) >> 25L;
|
||||
h8 += carry;
|
||||
h7 -= carry << 25L;
|
||||
carry = (h4 + 33554432L) >> 26L;
|
||||
h5 += carry;
|
||||
h4 -= carry << 26L;
|
||||
carry = (h8 + 33554432L) >> 26L;
|
||||
h9 += carry;
|
||||
h8 -= carry << 26L;
|
||||
carry = (h9 + 16777216L) >> 25L;
|
||||
h0 += carry * 19L;
|
||||
h9 -= carry << 25L;
|
||||
carry = (h0 + 33554432L) >> 26L;
|
||||
h1 += carry;
|
||||
h0 -= carry << 26L;
|
||||
h[0] = static_cast<int>(h0);
|
||||
h[1] = static_cast<int>(h1);
|
||||
h[2] = static_cast<int>(h2);
|
||||
|
||||
@@ -20,7 +20,9 @@ __device__ ds46 getAddr(const Addr16 rawAddr) noexcept {
|
||||
addrStr.data[pos++] = hexDigits[rawAddr[idx] & 0x0F];
|
||||
addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] >> 4];
|
||||
addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] & 0x0F];
|
||||
if (group < 7) { addrStr.data[pos++] = ':'; }
|
||||
if (group < 7) {
|
||||
addrStr.data[pos++] = ':';
|
||||
}
|
||||
}
|
||||
addrStr.data[pos] = '\0';
|
||||
return addrStr;
|
||||
@@ -30,7 +32,7 @@ __device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Addr16& rawA
|
||||
const int bitsToShift = lErase & 7;
|
||||
const int start = lErase >> 3;
|
||||
if (bitsToShift) {
|
||||
#pragma unroll
|
||||
#pragma unroll
|
||||
for (int i = start; i < start + 15; i++) {
|
||||
InvertedPublicKey[i] = static_cast<unsigned char>((InvertedPublicKey[i] << bitsToShift) | (InvertedPublicKey[i + 1] >> (8 - bitsToShift)));
|
||||
}
|
||||
|
||||
@@ -10,7 +10,8 @@ using Addr16 = unsigned char[16];
|
||||
using Key32 = unsigned char[32];
|
||||
struct KeysBox32 {
|
||||
Key32 PublicKey, PrivateKey;
|
||||
};__device__ ds64 ktos(const unsigned char* key) noexcept;
|
||||
};
|
||||
__device__ ds64 ktos(const unsigned char* key) noexcept;
|
||||
__device__ ds46 getAddr(const Addr16 rawAddr) noexcept;
|
||||
__device__ void getRawAddress(int lErase, Key32& InvertedPublicKey, Addr16& rawAddr) noexcept;
|
||||
__device__ void invertKey(const unsigned char* key, unsigned char* inverted);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <ed25519.cuh>
|
||||
__device__ __constant__ const precomp_data Bi[8] = { {
|
||||
__device__ __constant__ const precomp_data Bi[8] = {
|
||||
{
|
||||
{ 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 },
|
||||
{ -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 },
|
||||
{ -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 },
|
||||
|
||||
118
libs/sha512.cu
118
libs/sha512.cu
@@ -1,38 +1,103 @@
|
||||
#include <sha512.cuh>
|
||||
__device__ __constant__ sha512_state sha512_initial_state = { {
|
||||
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL,
|
||||
0x6a09e667f3bcc908ULL,
|
||||
0xbb67ae8584caa73bULL,
|
||||
0x3c6ef372fe94f82bULL,
|
||||
0xa54ff53a5f1d36f1ULL,
|
||||
0x510e527fade682d1ULL,
|
||||
0x9b05688c2b3e6c1fULL,
|
||||
0x1f83d9abfb41bd6bULL,
|
||||
0x5be0cd19137e2179ULL,
|
||||
} };
|
||||
__device__ __constant__ unsigned long round_k[80] = {
|
||||
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
|
||||
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
|
||||
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
|
||||
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
|
||||
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
|
||||
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
|
||||
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
|
||||
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
|
||||
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
|
||||
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
|
||||
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
|
||||
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
|
||||
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
|
||||
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
|
||||
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
|
||||
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
|
||||
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
|
||||
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
|
||||
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
|
||||
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
|
||||
0x428a2f98d728ae22ULL,
|
||||
0x7137449123ef65cdULL,
|
||||
0xb5c0fbcfec4d3b2fULL,
|
||||
0xe9b5dba58189dbbcULL,
|
||||
0x3956c25bf348b538ULL,
|
||||
0x59f111f1b605d019ULL,
|
||||
0x923f82a4af194f9bULL,
|
||||
0xab1c5ed5da6d8118ULL,
|
||||
0xd807aa98a3030242ULL,
|
||||
0x12835b0145706fbeULL,
|
||||
0x243185be4ee4b28cULL,
|
||||
0x550c7dc3d5ffb4e2ULL,
|
||||
0x72be5d74f27b896fULL,
|
||||
0x80deb1fe3b1696b1ULL,
|
||||
0x9bdc06a725c71235ULL,
|
||||
0xc19bf174cf692694ULL,
|
||||
0xe49b69c19ef14ad2ULL,
|
||||
0xefbe4786384f25e3ULL,
|
||||
0x0fc19dc68b8cd5b5ULL,
|
||||
0x240ca1cc77ac9c65ULL,
|
||||
0x2de92c6f592b0275ULL,
|
||||
0x4a7484aa6ea6e483ULL,
|
||||
0x5cb0a9dcbd41fbd4ULL,
|
||||
0x76f988da831153b5ULL,
|
||||
0x983e5152ee66dfabULL,
|
||||
0xa831c66d2db43210ULL,
|
||||
0xb00327c898fb213fULL,
|
||||
0xbf597fc7beef0ee4ULL,
|
||||
0xc6e00bf33da88fc2ULL,
|
||||
0xd5a79147930aa725ULL,
|
||||
0x06ca6351e003826fULL,
|
||||
0x142929670a0e6e70ULL,
|
||||
0x27b70a8546d22ffcULL,
|
||||
0x2e1b21385c26c926ULL,
|
||||
0x4d2c6dfc5ac42aedULL,
|
||||
0x53380d139d95b3dfULL,
|
||||
0x650a73548baf63deULL,
|
||||
0x766a0abb3c77b2a8ULL,
|
||||
0x81c2c92e47edaee6ULL,
|
||||
0x92722c851482353bULL,
|
||||
0xa2bfe8a14cf10364ULL,
|
||||
0xa81a664bbc423001ULL,
|
||||
0xc24b8b70d0f89791ULL,
|
||||
0xc76c51a30654be30ULL,
|
||||
0xd192e819d6ef5218ULL,
|
||||
0xd69906245565a910ULL,
|
||||
0xf40e35855771202aULL,
|
||||
0x106aa07032bbd1b8ULL,
|
||||
0x19a4c116b8d2d0c8ULL,
|
||||
0x1e376c085141ab53ULL,
|
||||
0x2748774cdf8eeb99ULL,
|
||||
0x34b0bcb5e19b48a8ULL,
|
||||
0x391c0cb3c5c95a63ULL,
|
||||
0x4ed8aa4ae3418acbULL,
|
||||
0x5b9cca4f7763e373ULL,
|
||||
0x682e6ff3d6b2b8a3ULL,
|
||||
0x748f82ee5defb2fcULL,
|
||||
0x78a5636f43172f60ULL,
|
||||
0x84c87814a1f0ab72ULL,
|
||||
0x8cc702081a6439ecULL,
|
||||
0x90befffa23631e28ULL,
|
||||
0xa4506cebde82bde9ULL,
|
||||
0xbef9a3f7b2c67915ULL,
|
||||
0xc67178f2e372532bULL,
|
||||
0xca273eceea26619cULL,
|
||||
0xd186b8c721c0c207ULL,
|
||||
0xeada7dd6cde0eb1eULL,
|
||||
0xf57d4f7fee6ed178ULL,
|
||||
0x06f067aa72176fbaULL,
|
||||
0x0a637dc5a2c898a6ULL,
|
||||
0x113f9804bef90daeULL,
|
||||
0x1b710b35131c471bULL,
|
||||
0x28db77f523047d84ULL,
|
||||
0x32caab7b40c72493ULL,
|
||||
0x3c9ebe0a15c9bebcULL,
|
||||
0x431d67c49c100d4cULL,
|
||||
0x4cc5d4becb3e42b6ULL,
|
||||
0x597f299cfc657e2aULL,
|
||||
0x5fcb6fab3ad6faecULL,
|
||||
0x6c44198c4a475817ULL,
|
||||
};
|
||||
__device__ __forceinline__ unsigned long load64(const unsigned char* __restrict__ x) {
|
||||
return (static_cast<unsigned long>(x[0]) << 56) | (static_cast<unsigned long>(x[1]) << 48) | (static_cast<unsigned long>(x[2]) << 40) | (static_cast<unsigned long>(x[3]) << 32) | (static_cast<unsigned long>(x[4]) << 24) | (static_cast<unsigned long>(x[5]) << 16) | (static_cast<unsigned long>(x[6]) << 8) | (static_cast<unsigned long>(x[7]));
|
||||
}
|
||||
__device__ inline void store64(unsigned char* __restrict__ x, unsigned long v) {
|
||||
#pragma unroll 8
|
||||
for (unsigned char i = 0; i < 8; i++) x[i] = static_cast<unsigned char>(v >> (56 - i * 8));
|
||||
for (unsigned char i = 0; i < 8; i++)
|
||||
x[i] = static_cast<unsigned char>(v >> (56 - i * 8));
|
||||
}
|
||||
#define rot64(x, bits) ((x >> bits) | (x << (64 - bits)))
|
||||
__device__ void sha512_block(sha512_state* __restrict__ s, const unsigned char* __restrict__ blk) {
|
||||
@@ -94,7 +159,8 @@ __device__ void sha512_final(sha512_state* __restrict__ s, const unsigned char*
|
||||
}
|
||||
__device__ void sha512_get(const sha512_state* __restrict__ s, unsigned char* __restrict__ hash) {
|
||||
unsigned len = 64;
|
||||
if (len > 128) len = 128;
|
||||
if (len > 128)
|
||||
len = 128;
|
||||
unsigned i = 0, c = (len < 8) ? len : 8;
|
||||
store64(hash, s->h[i]);
|
||||
hash += c;
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
__device__ int cstring_length(const char* s) {
|
||||
int len = 0;
|
||||
while (s[len]) len++;
|
||||
while (s[len])
|
||||
len++;
|
||||
return len;
|
||||
}
|
||||
__device__ int cstring_find(const char* s, const char* sub) {
|
||||
int i, j;
|
||||
int n = cstring_length(s);
|
||||
int m = cstring_length(sub);
|
||||
if (m == 0) return 0;
|
||||
if (m == 0)
|
||||
return 0;
|
||||
for (i = 0; i <= n - m; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
if (s[i + j] != sub[j]) break;
|
||||
if (s[i + j] != sub[j])
|
||||
break;
|
||||
}
|
||||
if (j == m) return i;
|
||||
if (j == m)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -22,7 +26,8 @@ __device__ int cstring_to_ull(const char* s, unsigned* val) {
|
||||
if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
|
||||
i = 2;
|
||||
}
|
||||
if (s[i] == '\0') return 1;
|
||||
if (s[i] == '\0')
|
||||
return 1;
|
||||
for (; s[i]; i++) {
|
||||
char c = s[i];
|
||||
int digit;
|
||||
@@ -60,7 +65,8 @@ __device__ void concat(const char* s1, const char* s2, char* out, int outSize) {
|
||||
}
|
||||
while (s2[j] && i < outSize - 1) {
|
||||
out[i] = s2[j];
|
||||
i++; j++;
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
out[i] = '\0';
|
||||
}
|
||||
|
||||
37
meson.build
37
meson.build
@@ -1,54 +1,59 @@
|
||||
|
||||
project('yggm', ['cpp', 'cuda'],
|
||||
project(
|
||||
'yggm',
|
||||
['cpp', 'cuda'],
|
||||
version: '20.08.2025',
|
||||
default_options: ['cpp_std=c++20']
|
||||
default_options: ['cpp_std=c++20'],
|
||||
)
|
||||
|
||||
cpp = meson.get_compiler('cpp')
|
||||
cuda = meson.get_compiler('cuda')
|
||||
|
||||
common_cpp_flags = [
|
||||
'-march=native',
|
||||
'-ffast-math',
|
||||
'-pipe',
|
||||
'-funroll-loops',
|
||||
'-Wpedantic',
|
||||
'-Wconversion',
|
||||
'-Wuninitialized',
|
||||
'-Wsign-conversion',
|
||||
'-masm=intel'
|
||||
'-masm=intel',
|
||||
'-I../libs/',
|
||||
]
|
||||
|
||||
common_cuda_flags = [
|
||||
'-rdc=true',
|
||||
'-Xptxas', '-O3',
|
||||
'-Xptxas',
|
||||
'-O3',
|
||||
'-use_fast_math',
|
||||
'-ftz=true',
|
||||
'-prec-div=false',
|
||||
'-prec-sqrt=false',
|
||||
'-gencode', 'arch=compute_75,code=sm_75',
|
||||
'-Wno-deprecated-gpu-targets',
|
||||
'--default-stream', 'per-thread',
|
||||
'--expt-relaxed-constexpr',
|
||||
'-I../libs/',
|
||||
'-I/opt/cuda/include',
|
||||
]
|
||||
|
||||
add_project_arguments(common_cpp_flags, language: 'cpp')
|
||||
add_project_arguments(common_cuda_flags, language: 'cuda')
|
||||
|
||||
if get_option('buildtype') == 'release'
|
||||
add_project_arguments(['-DRELEASE'], language: ['cpp', 'cuda'])
|
||||
add_project_arguments(['-DRELEASE', '-DAVX2'], language: ['cpp', 'cuda'])
|
||||
elif get_option('buildtype') == 'debug'
|
||||
add_project_arguments(['-DDEBUG', '-g', '-O0'], language: ['cpp', 'cuda'])
|
||||
add_project_arguments(['-DDEBUG', '-g', '-O0', '-DAVX2'], language: ['cpp', 'cuda'])
|
||||
endif
|
||||
|
||||
cpp_sources = ['sources/main.cpp']
|
||||
cpp_sources = ['sources/main.cpp'] + files(run_command('find', 'libs', '-name','*.h').stdout().split())
|
||||
cuda_sources = ['sources/main.cu'] + files(run_command('find', 'libs', '-name', '*.cu').stdout().split())
|
||||
|
||||
executable('yggmcu',
|
||||
executable(
|
||||
'yggmcu',
|
||||
sources: cuda_sources,
|
||||
install: true,
|
||||
dependencies: dependency('cuda', modules: ['cudart'], required: false)
|
||||
)
|
||||
|
||||
executable('yggm',
|
||||
executable(
|
||||
'yggmc',
|
||||
sources: cpp_sources,
|
||||
install: true,
|
||||
dependencies: dependency('libsodium')
|
||||
dependencies: dependency('libsodium'),
|
||||
)
|
||||
4
native.ini
Normal file
4
native.ini
Normal file
@@ -0,0 +1,4 @@
|
||||
[binaries]
|
||||
cpp = 'clang++'
|
||||
cuda = 'nvcc'
|
||||
cuda_host_compiler = 'gcc-14'
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "defines.h"
|
||||
#include <arpa/inet.h>
|
||||
#include <atomic>
|
||||
#include "../libs/defines.h"
|
||||
#include <immintrin.h>
|
||||
#include <iostream>
|
||||
#include <memory.h>
|
||||
@@ -80,7 +80,7 @@ void getRawAddress(unsigned lErase, Key& InvertedPublicKey, Address& rawAddr) {
|
||||
memcpy(&rawAddr[2], &InvertedPublicKey[start], 14);
|
||||
}
|
||||
inline void invertKey(const unsigned char* __restrict key, Key& inverted) {
|
||||
#if USE_AVX2
|
||||
#ifdef __AVX2__
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i*>(inverted), _mm256_xor_si256(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(key)), _mm256_set1_epi8(0xFF)));
|
||||
#else
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
@@ -102,17 +102,16 @@ inline unsigned getZeros(const Key& v) {
|
||||
return leadZeros;
|
||||
}
|
||||
void miner_thread() {
|
||||
alignas(32) Key inv;
|
||||
KeysBox keys;
|
||||
Address rawAddr;
|
||||
alignas(32) thread_local Key inv;
|
||||
thread_local KeysBox keys;
|
||||
thread_local Address rawAddr;
|
||||
while (true) {
|
||||
crypto_sign_ed25519_keypair(keys.PublicKey, keys.PrivateKey);
|
||||
unsigned ones = getZeros(keys.PublicKey);
|
||||
unsigned current_high = conf.high.load(std::memory_order_relaxed);
|
||||
while (ones > current_high && !conf.high.compare_exchange_strong(current_high, ones, std::memory_order_relaxed)) {
|
||||
current_high = conf.high.load(std::memory_order_relaxed);
|
||||
unsigned ones = getZeros(keys.PublicKey), high = conf.high.load(std::memory_order_relaxed);
|
||||
while (ones > high && !conf.high.compare_exchange_strong(high, ones, std::memory_order_relaxed)) {
|
||||
high = conf.high.load(std::memory_order_relaxed);
|
||||
}
|
||||
if (ones > current_high) {
|
||||
if (ones > high) {
|
||||
invertKey(keys.PublicKey, inv);
|
||||
getRawAddress(ones, inv, rawAddr);
|
||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddress(rawAddr).c_str(), KeyToString(keys.PublicKey).c_str(), KeyToString(keys.PrivateKey).c_str());
|
||||
@@ -129,7 +128,7 @@ void startThreads() {
|
||||
}
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
printf("BuildType: %s\nAVX2: %d\n", __BUILDTYPE__, USE_AVX2);
|
||||
std::cout << "BuildType: " << __BUILDTYPE__ << std::endl << "AVX2: " << __AVX2__ << std::endl;
|
||||
if (argc < 1)
|
||||
return 0;
|
||||
for (int x = 0; x < argc; x++) {
|
||||
|
||||
Reference in New Issue
Block a user