diff --git a/ed25519/ed25519.h b/ed25519/ed25519.h index 43007c6..6f4c37a 100644 --- a/ed25519/ed25519.h +++ b/ed25519/ed25519.h @@ -3,7 +3,22 @@ #define ED25519_PUBLICKEYBYTES 32 +#ifndef ED25519_donna +# if defined(_MSC_VER) +# define ALIGN(x) __declspec(align(x)) +# elif defined(__GNUC__) +# undef ALIGN +# define ALIGN(x) __attribute__((aligned(x))) +# else +# ifndef ALIGN +# define ALIGN(x) +# endif +# endif +#endif + + #ifdef ED25519_ref10 + #include "ref10/ed25519.h" #define ed25519_seckey ed25519_ref10_seckey #define ed25519_seckey_expand ed25519_ref10_seckey_expand @@ -35,49 +50,47 @@ static const ge_cached ge_eightpoint = { } }; inline static void ge_initeightpoint(void) {} + #endif #ifdef ED25519_amd64_51_30k -#define ED25519_amd64_common -#else -#ifdef ED25519_amd64_64_24k -#define ED25519_amd64_common -#endif -#endif - -#ifdef ED25519_amd64_51_30k #include "amd64-51-30k/ed25519.h" #include "amd64-51-30k/ge25519.h" #define ed25519_seckey ed25519_amd64_51_30k_seckey #define ed25519_seckey_expand ed25519_amd64_51_30k_seckey_expand #define ed25519_pubkey ed25519_amd64_51_30k_pubkey #define ed25519_keygen ed25519_amd64_51_30k_keygen + #endif + #ifdef ED25519_amd64_64_24k + #include "amd64-64-24k/ed25519.h" #include "amd64-64-24k/ge25519.h" #define ed25519_seckey ed25519_amd64_64_seckey #define ed25519_seckey_expand ed25519_amd64_64_seckey_expand #define ed25519_pubkey ed25519_amd64_64_pubkey #define ed25519_keygen ed25519_amd64_64_keygen + #endif // common -#ifdef ED25519_amd64_common +#if defined(ED25519_amd64_51_30k) || defined(ED25519_amd64_64_24k) + #define fe fe25519 #define ge_p1p1 ge25519_p1p1 #define ge_p3 ge25519_p3 -#define ge_cached ge25519_pniels #define ge_p1p1_to_p3 ge25519_p1p1_to_p3 #define ge_p3_tobytes ge25519_pack #define ge_add ge25519_pnielsadd_p1p1 #define ge_p3_batchtobytes_destructive_1 ge25519_batchpack_destructive_1 #define ge_p3_batchtobytes_destructive_finish ge25519_batchpack_destructive_finish + #endif @@ -130,6 +143,7 @@ inline static void ge_initeightpoint(void) {} #ifdef ED25519_donna + #define ED25519_CUSTOMRANDOM #define ED25519_CUSTOMHASH #include @@ -174,11 +188,10 @@ static int ed25519_keypair(unsigned char *pk,unsigned char *sk) return 0; } -// hacky, but works for current stuff in main.c -#define fe bignum25519 ALIGN(16) -#define ge_p1p1 ge25519_p1p1 ALIGN(16) -#define ge_p3 ge25519 ALIGN(16) -#define ge_cached ge25519_pniels ALIGN(16) + +#define fe bignum25519 +#define ge_p1p1 ge25519_p1p1 +#define ge_p3 ge25519 #define ge_p1p1_to_p3 ge25519_p1p1_to_full #define ge_p3_tobytes ge25519_pack diff --git a/main.c b/main.c index f48c1a2..22f9cfe 100644 --- a/main.c +++ b/main.c @@ -186,8 +186,6 @@ enum worker_type { WT_BATCH, }; -#define TATTR 0 - int main(int argc,char **argv) { const char *outfile = 0; @@ -520,7 +518,6 @@ int main(int argc,char **argv) VEC_ZERO(tstats); #endif -#if TATTR pthread_attr_t tattr,*tattrp = &tattr; tret = pthread_attr_init(tattrp); if (tret) { @@ -528,11 +525,17 @@ int main(int argc,char **argv) tattrp = 0; } else { - tret = pthread_attr_setstacksize(tattrp,2<<20); + // 256KiB plus whatever batch stuff uses if in batch mode + size_t ss = 256 << 10; + if (wt == WT_BATCH) + ss += worker_batch_memuse(); + // align to 64KiB + ss = (ss + (64 << 10) - 1) & ~((64 << 10) - 1); + //printf("stack size: " FSZ "\n",ss); + tret = pthread_attr_setstacksize(tattrp,ss); if (tret) perror("pthread_attr_setstacksize"); } -#endif for (size_t i = 0;i < VEC_LENGTH(threads);++i) { void *tp = 0; @@ -562,13 +565,11 @@ int main(int argc,char **argv) } } -#if TATTR if (tattrp) { tret = pthread_attr_destroy(tattrp); if (tret) perror("pthread_attr_destroy"); } -#endif #ifdef STATISTICS struct timespec nowtime; diff --git a/worker.c b/worker.c index d4f48de..41d540b 100644 --- a/worker.c +++ b/worker.c @@ -94,9 +94,9 @@ static void onionready(char *sname,const u8 *secret,const u8 *pubonion) } // disabled as this was never ever triggered as far as I'm aware -#if 0 +#if 1 // Sanity check that the public key matches the private one. - ge_p3 point; + ge_p3 ALIGN(16) point; u8 testpk[PUBLIC_LEN]; ge_scalarmult_base(&point, secret); ge_p3_tobytes(testpk, &point); @@ -220,6 +220,11 @@ static void reseedright(u8 sk[SECRET_LEN]) #define BATCHNUM 2048 #endif +size_t worker_batch_memuse(void) +{ + return (sizeof(ge_p3) + sizeof(fe) + sizeof(bytes32)) * BATCHNUM; +} + #include "worker_batch.inc.h" #include "worker_batch_pass.inc.h" diff --git a/worker.h b/worker.h index 209d7e5..7f50f6f 100644 --- a/worker.h +++ b/worker.h @@ -38,6 +38,7 @@ extern u8 determseed[SEED_LEN]; extern void worker_init(void); extern char *makesname(void); +extern size_t worker_batch_memuse(void); extern void *worker_slow(void *task); extern void *worker_fast(void *task); diff --git a/worker_batch.inc.h b/worker_batch.inc.h index 1636c0b..8de5330 100644 --- a/worker_batch.inc.h +++ b/worker_batch.inc.h @@ -8,13 +8,13 @@ void *worker_batch(void *task) u8 seed[SEED_LEN]; u8 hashsrc[checksumstrlen + PUBLIC_LEN + 1]; u8 wpk[PUBLIC_LEN + 1]; - ge_p3 ge_public; + ge_p3 ALIGN(16) ge_public; char *sname; // state to keep batch data - ge_p3 ge_batch[BATCHNUM]; - fe tmp_batch[BATCHNUM]; - bytes32 pk_batch[BATCHNUM]; + ge_p3 ALIGN(16) ge_batch [BATCHNUM]; + fe ALIGN(16) tmp_batch[BATCHNUM]; + bytes32 ALIGN(16) pk_batch [BATCHNUM]; size_t counter; size_t i; @@ -47,7 +47,7 @@ initseed: ge_scalarmult_base(&ge_public,sk); for (counter = 0;counter < SIZE_MAX-(8*BATCHNUM);counter += 8*BATCHNUM) { - ge_p1p1 sum; + ge_p1p1 ALIGN(16) sum; if (unlikely(endwork)) goto end; diff --git a/worker_batch_pass.inc.h b/worker_batch_pass.inc.h index eaf914b..3676eec 100644 --- a/worker_batch_pass.inc.h +++ b/worker_batch_pass.inc.h @@ -9,13 +9,13 @@ void *worker_batch_pass(void *task) u8 seed[SEED_LEN]; u8 hashsrc[checksumstrlen + PUBLIC_LEN + 1]; u8 wpk[PUBLIC_LEN + 1]; - ge_p3 ge_public; + ge_p3 ALIGN(16) ge_public; char *sname; // state to keep batch data - ge_p3 ge_batch[BATCHNUM]; - fe tmp_batch[BATCHNUM]; - bytes32 pk_batch[BATCHNUM]; + ge_p3 ALIGN(16) ge_batch [BATCHNUM]; + fe ALIGN(16) tmp_batch[BATCHNUM]; + bytes32 ALIGN(16) pk_batch [BATCHNUM]; size_t counter,oldcounter; size_t i; @@ -53,7 +53,7 @@ initseed: ge_scalarmult_base(&ge_public,sk); for (counter = oldcounter = 0;counter < DETERMINISTIC_LOOP_COUNT - (BATCHNUM - 1) * 8;counter += BATCHNUM * 8) { - ge_p1p1 sum; + ge_p1p1 ALIGN(16) sum; if (unlikely(endwork)) goto end; @@ -120,7 +120,7 @@ initseed: // can't have leftovers in theory if BATCHNUM was power of 2 and smaller than DETERMINISTIC_LOOP_COUNT bound #if (BATCHNUM & (BATCHNUM - 1)) || (BATCHNUM * 8) > DETERMINISTIC_LOOP_COUNT if (counter < DETERMINISTIC_LOOP_COUNT) { - ge_p1p1 sum; + ge_p1p1 ALIGN(16) sum; if (unlikely(endwork)) goto end; diff --git a/worker_fast.inc.h b/worker_fast.inc.h index db57b0b..141a34b 100644 --- a/worker_fast.inc.h +++ b/worker_fast.inc.h @@ -8,7 +8,7 @@ void *worker_fast(void *task) u8 seed[SEED_LEN]; u8 hashsrc[checksumstrlen + PUBLIC_LEN + 1]; u8 wpk[PUBLIC_LEN + 1]; - ge_p3 ge_public; + ge_p3 ALIGN(16) ge_public; char *sname; size_t counter; @@ -45,7 +45,7 @@ initseed: ge_p3_tobytes(pk,&ge_public); for (counter = 0;counter < SIZE_MAX-8;counter += 8) { - ge_p1p1 sum; + ge_p1p1 ALIGN(16) sum; if (unlikely(endwork)) goto end; diff --git a/worker_fast_pass.inc.h b/worker_fast_pass.inc.h index 8b698ef..2d482b3 100644 --- a/worker_fast_pass.inc.h +++ b/worker_fast_pass.inc.h @@ -9,7 +9,7 @@ void *worker_fast_pass(void *task) u8 seed[SEED_LEN]; u8 hashsrc[checksumstrlen + PUBLIC_LEN + 1]; u8 wpk[PUBLIC_LEN + 1]; - ge_p3 ge_public; + ge_p3 ALIGN(16) ge_public; char *sname; size_t counter,oldcounter; @@ -51,7 +51,7 @@ initseed: ge_p3_tobytes(pk,&ge_public); for (counter = oldcounter = 0;counter < DETERMINISTIC_LOOP_COUNT;counter += 8) { - ge_p1p1 sum; + ge_p1p1 ALIGN(16) sum; if (unlikely(endwork)) goto end;