yggm/sources/main.cu
2025-03-14 19:55:09 +05:00

140 lines
4.8 KiB
Plaintext

#include <stdio.h>
#include <cuda_runtime.h>
#include <curand_kernel.h>
#include <sha512.cuh>
#include <ed25519.cuh>
#include <edsign.cuh>
using Address = unsigned char[16];
using Key = unsigned char[32];
struct KeysBox {
Key PublicKey;
Key PrivateKey;
};
__device__ static unsigned high = 0x10;
struct ds64 {
char data[65];
};
struct ds46 {
char data[46];
};
__device__ ds64 KeyToString(const unsigned char* key) noexcept {
ds64 str;
const char* hexDigits = "0123456789abcdef";
for (unsigned char i = 0; i < 32; i++) {
str.data[2 * i] = hexDigits[key[i] >> 4];
str.data[2 * i + 1] = hexDigits[key[i] & 0x0F];
}
str.data[64] = '\0';
return str;
}
__device__ ds46 getAddress(const unsigned char rawAddr[16]) noexcept {
ds46 addrStr;
const char* hexDigits = "0123456789abcdef";
unsigned pos = 0;
for (unsigned char group = 0; group < 8; group++) {
int idx = group * 2;
addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4];
addrStr.data[pos++] = hexDigits[rawAddr[idx] & 0x0F];
addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] >> 4];
addrStr.data[pos++] = hexDigits[rawAddr[idx + 1] & 0x0F];
if (group < 7) {
addrStr.data[pos++] = ':';
}
}
addrStr.data[pos] = '\0';
return addrStr;
}
__device__ void getRawAddress(int lErase, Key& InvertedPublicKey, Address& rawAddr) noexcept {
lErase++;
const int bitsToShift = lErase % 8;
const int start = lErase / 8;
if (bitsToShift != 0) {
for (int i = start; i < start + 15; i++) {
InvertedPublicKey[i] = static_cast<unsigned char>((InvertedPublicKey[i] << bitsToShift) | (InvertedPublicKey[i + 1] >> (8 - bitsToShift)));
}
}
rawAddr[0] = 0x02;
rawAddr[1] = static_cast<unsigned char>(lErase - 1);
memcpy(&rawAddr[2], &InvertedPublicKey[start], 14);
}
__device__ unsigned char zeroCounter(unsigned int x) {
if (x == 0) return 32;
return static_cast<unsigned char>(__builtin_clz(x));
}
__device__ unsigned char getZeros(const unsigned char* v) {
unsigned char leadZeros = 0;
for (int i = 0; i < 32; i += 4) {
unsigned word = (static_cast<unsigned>(v[i]) << 24) | (static_cast<unsigned>(v[i + 1]) << 16) | (static_cast<unsigned>(v[i + 2]) << 8) | (static_cast<unsigned>(v[i + 3]));
if (word == 0) {
leadZeros += 32;
} else {
leadZeros += zeroCounter(word);
break;
}
}
return leadZeros;
}
__global__ void initRand(curandState* randStates) {
int id = blockIdx.x * blockDim.x + threadIdx.x;
curand_init((unsigned long long)clock64() + id, id, 0, &randStates[id]);
}
__device__ unsigned long long xorshift128plus(unsigned long long* state) noexcept {
unsigned long long x = state[0];
const unsigned long long y = state[1];
state[0] = y;
x ^= x << 23;
x ^= x >> 17;
x ^= y ^ (y >> 26);
state[1] = x;
return x + y;
}
__device__ void rmbytes(unsigned char* buf, unsigned long size, unsigned long long* state) {
for (unsigned long i = 0; i < size; i++) {
buf[i] = xorshift128plus(state) & 0xFF;
}
}
__device__ void invertKey(const unsigned char* key, unsigned char* inverted) {
for (int i = 0; i < 32; i++)
inverted[i] = key[i] ^ 0xFF;
}
__global__ void minerKernel(curandState* randStates) {
int thid = blockIdx.x * blockDim.x + threadIdx.x;
curandState localState = randStates[thid];
unsigned long long xorshiftState[2];
xorshiftState[0] = curand(&localState);
xorshiftState[1] = curand(&localState);
Key seed;
rmbytes(seed, sizeof(seed), xorshiftState);
if (thid == 0) printf("Seed: %s\n", KeyToString(seed).data);
while (true) {
KeysBox keys;
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax(&high, (unsigned)zeros)) {
printf("\nIPv6:\t%x\nPK:\t%s\nSK:\t%s\n", zeros, KeyToString(keys.PublicKey).data, KeyToString(keys.PrivateKey).data);
}
rmbytes(seed, sizeof(seed), xorshiftState);
}
}
int main() {
const int threadsPerBlock = 256;
cudaDeviceProp prop;
cudaGetDeviceProperties_v2(&prop, 0);
int mBpSM;
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, minerKernel, threadsPerBlock, 0);
int SMs = prop.multiProcessorCount;
int maxBlocks = mBpSM * SMs;
const int totalThreads = maxBlocks * threadsPerBlock;
printf("SMs: %d\n", SMs);
printf("maxBlocks: %d\n", maxBlocks);
printf("totalThreads: %d\n", totalThreads);
printf("MaxBlocksPerSM: %d\n", mBpSM);
curandState* rst;
cudaMalloc(&rst, totalThreads * sizeof(curandState));
initRand<<<100, threadsPerBlock >>>(rst);
cudaDeviceSynchronize();
minerKernel<<<100, threadsPerBlock>>>(rst);
cudaDeviceSynchronize();
cudaFree(rst);
return 0;
}