Skip to content
Snippets Groups Projects
Commit d224f00e authored by Julia Sauvage's avatar Julia Sauvage
Browse files

Merge branch 'master' of gitlab.cristal.univ-lille.fr:bouillag/pcg (peut etre caca)

parents 2a829ec8 aea1a534
No related branches found
No related tags found
No related merge requests found
......@@ -5,3 +5,7 @@
/Cunknown/test_falsenegative
/Cunknown/benchmark
/Cunknown/main
/Cunknown/benchmark_omp
/Cunknown/checkpoint.bin
/challenges/pcg64-challenge
/challenges/pcg64s-challenge
......@@ -14,7 +14,7 @@ unsigned long long Greduite[9] = {
-728312298332, 5479732607037, 6319848582548
};
float invG[9] = {
double invG[9] = {
-9.25221813226351e-14, 2.32272588749499e-13, 5.30001389997814e-15,
-7.81560146462246e-14, -4.52719459143047e-15, 1.09411828555506e-13,
5.71040610630735e-14, 3.06929503514353e-14, 6.39750297008745e-14
......@@ -84,28 +84,48 @@ void getSumPol(unsigned long long* sumPol,unsigned long long* sumPolY, pcg128_t*
}
/* cf. https://stackoverflow.com/questions/17035464/a-fast-method-to-round-a-double-to-a-32-bit-int-explained#comment61972557_17035583 */
static inline long long crazy_round(double x)
{
union { double d; long long l; } magic;
magic.d = x + 6755399441055744.0;
magic.l <<= 13;
magic.l >>= 13;
return magic.l;
}
static inline long long light_crazy_round(double x)
{
union { double d; long long l; } magic;
magic.d = x;
magic.l <<= 13;
magic.l >>= 13;
return magic.l;
}
/* sumPol/sumPolY are constant over many iterations. X (unrotated) and rot vary each time. */
int solve(pcg128_t* S, const unsigned long long* X, const int* rot, const unsigned long long* sumPol, const unsigned long long* sumPolY)
{
unsigned long long Y[nbiter];
unsigned long long Yprim[nbiter];
double Yprim[nbiter];
unsigned long long tmp3[nbiter];
float tmp2[nbiter];
for (int i = 0 ; i < nbiter ; i++) {
Y[i] = (((sumPol[i] ^ X[i]) % (1 << known_low)) << known_up ) + (rot[i] ^ (X[i] >> (k - known_up)));
Yprim[i] = (Y[i] - sumPolY[i]) % (1 << (known_low + known_up));
}
double tmp2[nbiter];
for (int i=0 ; i<nbiter ; i++) {
tmp2[i] = 0;
for(int j=0 ; j<nbiter ; j++)
tmp2[i] += invG[i * nbiter + j] * Yprim[j];
tmp2[i] += 6755399441055744.0;
}
for(int i = 0 ; i < nbiter ; i++)
tmp3[i] = (unsigned long long) roundf(tmp2[i]);
tmp3[i] = light_crazy_round(tmp2[i]);
unsigned long long Sprim0 = 0;
for(int j=0 ; j<nbiter ; j++)
......
......@@ -14,9 +14,8 @@ pcg128_t a;
pcg128_t c;
pcg128_t polC[nbiter];
extern unsigned long long Greduite[9];
extern float invG[9];
// extern unsigned long long Greduite[9];
// extern double invG[9];
/***** Fonctions *****/
void init_var_globales();
......
CFLAGS = -O3 -Wall -Wextra -Werror -march=native -mtune=native -fopenmp
# icc (avec "source /usr/intel/compilers_and_libraries/linux/bin/compilervars.sh intel64" avant)
#CC = icc
#CFLAGS = -O3 -xHost -qopt-zmm-usage=high -qopenmp
#LDFLAGS = -fopenmp -qopenmp
# gcc
CFLAGS = -O3 -Wall -Wextra -Werror -march=native -mtune=native -fopenmp -g
LDFLAGS = -fopenmp
LDLIBS = -lgmp -lm
LDLIBS = -lm
all: main benchmark test test_falsenegative benchmark_omp
all: main benchmark test
main.o: CC=mpicc
main: CC=mpicc
fonctions.o: fonctions.h
fonctions_bonus.o: fonctions.h
main.o: fonctions.h pcg_setseq.h
test.o: fonctions.h pcg_setseq.h
test_falsenegative.o: fonctions.h pcg_setseq.h
benchmark.o: fonctions.h pcg_setseq.h
benchmark_omp.o: fonctions.h pcg_setseq.h
main: fonctions.o main.o
benchmark: fonctions.o benchmark.o
benchmark_omp: fonctions.o benchmark_omp.o
test: fonctions.o test.o fonctions_bonus.o
test_falsenegative.o: fonctions.h pcg_setseq.h
test_falsenegative: fonctions.o test_falsenegative.o
.PHONY: clean check
......@@ -20,3 +34,7 @@ clean:
check: test test_falsenegative
prove -v ./test ./test_falsenegative
bench: benchmark benchmark_omp
./benchmark
./benchmark_omp
\ No newline at end of file
......@@ -6,71 +6,41 @@
#define WORK_FACTOR (1ull << 27)
int main() {
int main()
{
/* INITIALISATION DES PARAMETRES */
init_var_globales();
/********** Calculs/Tests plus ou moins à la con ***********/
/********** prepare test input ***********/
u64 X[nboutput];
pcg128_t S0 = (((pcg128_t) 5995207026785010249u) << k) + ((pcg128_t) 179350442155841024u);
pcg128_t c = ((((pcg128_t) 6364136223846793005u) << k) + 1442695040888963407u) >> 1;
pcg128_t vraiS[nboutput];
unsigned long long X[nboutput];
//printVal(S0, c);
pcg(vraiS, X, S0, &c, nboutput);
unsigned long long tabX[k * nbtest];
for (int i = 0; i < nbtest; i++)
for (int j = 0; j < k; j++)
tabX[i * k + j] = unrotate(X[i + nbiter], j);
//unsigned long long done = 0;
unsigned long long W0 = 5018, WC = 335;
u64 W0 = 5018, WC = 335;
/**** Polynômes en WC et W0 utilisés dans la résolution ****/
unsigned long long lowSumPol[nbiter + nbtest];
unsigned long long sumPolY[nbiter];
unsigned long long sumPolTest[nbtest];
for(int i = 0 ; i < nbiter ; i++){
lowSumPol[i] = (W0 * ((unsigned long long) powA[i]) + WC * ((unsigned long long) polA[i]));
sumPolY[i] = (polA[i] * WC + powA[i] * W0) >> (k - known_up);
}
for(int i = 0 ; i < nbtest ; i++){
lowSumPol[nbiter + i] = (W0 * ((unsigned long long) powA[i + nbiter]) + WC * ((unsigned long long) polA[i + nbiter]));
sumPolTest[i] = W0 * ((unsigned long long) (powA[i + nbiter] >> known_low) - 1) + WC * ((unsigned long long) (polA[i + nbiter] >> known_low) - 1);
}
char* goodY = setupGoodY();
getGoodY(goodY, tabX, lowSumPol, 1);
struct task_t task;
init_task(&task);
prepare_task(X, W0, WC, &task);
unsigned long long tabTmp[k * nbiter];
getTabTmp(tabTmp, X, lowSumPol, sumPolY);
int rot[nbiter];
for(int i = 0 ; i < nbiter ; i++)
rot[i] = 0;
printf("Taille de GoodY = %d Ko\n", nbtest * (1 << (known_low + known_up)) / 1024 / 8);
printf("known_low = %d\n", known_low);
printf("Taille de GoodY = %d Ko\n", (1 << (known_low + known_up)) / 1024 / 8);
printf("Début du benchmark (%llu iterations)\n", WORK_FACTOR);
double t1 = wtime();
for (unsigned long long r = 0 ;r < WORK_FACTOR; r++) {
for (u64 r = 0 ;r < WORK_FACTOR; r++) {
/***** Modification de rot et unrotX *****/
rot[0] = (rot[0] + 1) % k;
task.rot[0] = (task.rot[0] + 1) % k;
int i = 0;
while (rot[i] == 0 && i < nbiter) {
while (task.rot[i] == 0 && i < nbiter) {
i++;
rot[i] = (rot[i] + 1) % k;
task.rot[i] = (task.rot[i] + 1) % k;
}
if (solve_isgood(goodY, rot, tabTmp, sumPolY, sumPolTest)) {
if (solve_isgood(&task)) {
printf("candidat DS64 trouvé !!\n");
printf("temps pour trouver la solution = %f\n", wtime() - t1);
}
......@@ -78,8 +48,10 @@ int main() {
double t = wtime() - t1;
printf("Durée benchmark = %.2fs\n", t);
printf("Attaque complète = %.2fMh\n", t / WORK_FACTOR * (1ull << (nbiter * known_up + 2*known_low - 1)) / 3600 / 1e6);
printf("Itérations/s = %.1fM/s\n", WORK_FACTOR / t / 1e6);
printf("Concrètement = %d tasks of size %.1f h-CPU\n", 1 << known_low,
t / WORK_FACTOR * (1ull << (nbiter * known_up + known_low - 1)) / 3600);
printf("Attaque complète = %.0fK h-CPU\n", t / WORK_FACTOR * (1ull << (nbiter * known_up + 2*known_low - 1)) / 3600 / 1e3);
exit(0);
}
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include "fonctions.h"
static const u64 WORK_FACTOR = 1 << (nbiter * known_up);
int main()
{
/* INITIALISATION DES PARAMETRES */
init_var_globales();
/********** prepare test input ***********/
u64 X[nboutput];
pcg128_t S0 = (((pcg128_t) 5995207026785010249u) << k) + ((pcg128_t) 179350442155841024u);
pcg128_t c = ((((pcg128_t) 6364136223846793005u) << k) + 1442695040888963407u) >> 1;
pcg128_t vraiS[nboutput];
pcg(vraiS, X, S0, &c, nboutput);
int T = omp_get_max_threads();
u64 W0 = 5018, WC = 335 - T/2;
printf("known_low = %d\n", known_low);
printf("# threads = %d\n", T);
printf("Début du benchmark\n");
double t1 = wtime();
#pragma omp parallel
{
int tid = omp_get_thread_num();
struct task_t task;
init_task(&task);
prepare_task(X, W0, WC + tid, &task);
for (u64 r = 0; r < WORK_FACTOR; r++) {
/***** Modification de rot et unrotX *****/
task.rot[0] = (task.rot[0] + 1) % k;
int i = 0;
while (task.rot[i] == 0 && i < nbiter) {
i++;
task.rot[i] = (task.rot[i] + 1) % k;
}
if (solve_isgood(&task)) {
printf("thread %d, candidat DS64 trouvé !!\n", tid);
printf("temps pour trouver la solution = %f\n", wtime() - t1);
}
}
}
double t = wtime() - t1;
printf("Durée benchmark = %.2fs\n", t);
printf("Itérations/s (%d threads) = %.1fM/s\n", T, WORK_FACTOR / t / 1e6 * T);
printf("Itérations/s (1 threads) = %.1fM/s\n", WORK_FACTOR / t / 1e6);
printf("Attaque complète = %.0fK h-CPU\n", t / WORK_FACTOR * (1ull << (nbiter * known_up + 2*known_low - 1)) / 3600 / 1e3);
exit(0);
}
......@@ -4,13 +4,18 @@
#include "fonctions.h"
unsigned long long Greduite[16] =
pcg128_t a;
pcg128_t powA[nboutput];
pcg128_t polA[nboutput];
u64 Greduite[16] =
{-186304953996472, -216211368070119, 110964501361298, 131252974561432,
-126056243766680, 99587582169277, -5646098666150, -233919070109448,
7937589136904, -214303762177807, -268280113597118, -98716819647784,
93078431381544, -1707551230219, 149382085707466, -134620659538888};
float invG[16] =
double invG[16] =
{-2.04279952328856e-15, -2.93791683689260e-15, 6.74861642263602e-16, 2.61840245666112e-15,
-1.98886046520741e-15, 1.10621147658696e-15, -2.12731308263381e-15, -2.30132753131773e-15,
1.44762674070265e-15, -1.54769860122306e-16, -1.55490854567009e-15, 2.82055195172104e-15,
......@@ -22,9 +27,6 @@ void init_var_globales()
//multiplier a OK !
a = (((pcg128_t) 2549297995355413924) << k) + ((pcg128_t) 4865540595714422341);
//nombre de threads
nb_thread = omp_get_max_threads();
//increment polynome polC OK !
polA[0] = 0;
powA[0] = 1;
......@@ -36,7 +38,6 @@ void init_var_globales()
invG[i] *= 1ull << (k - known_up - known_low);
}
//////////////////// chrono //////////////////
double wtime()
{
......@@ -45,19 +46,22 @@ double wtime()
return (double) ts.tv_sec + ts.tv_usec / 1e6;
}
static inline u64 unrotate(u64 Xi, int i)
{
return (Xi >> (k-i)) | (Xi << i);
}
char * setupGoodY()
{
char* goodY = malloc((1<<(known_low + known_up)) * sizeof(char) * nbtest / 8);
for (unsigned long long y = 0 ; y < nbtest * (1<< (known_low + known_up)) / 8 ; y++)
for (u64 y = 0 ; y < nbtest * (1<< (known_low + known_up)) / 8 ; y++)
goodY[y] = 0;
return goodY;
}
static inline void setbit(char *goodY, int i, unsigned long long Y, int v)
static inline void setbit(char *goodY, int i, u64 Y, int v)
{
// Y &= (1 << (known_up + known_low - 1));
int idx = Y + i * (1 << (known_up + known_low));
// idx = idx / 4;
int j = idx / 8;
int l = idx % 8;
if (v == 1)
......@@ -66,14 +70,14 @@ static inline void setbit(char *goodY, int i, unsigned long long Y, int v)
goodY[j] &= ~(1 << l);
}
void getGoodY(char* goodY, unsigned long long* tabX, unsigned long long* lowSumPol, int v)
void getGoodY(char* goodY, const u64* X, const u64* lowSumPol, int v)
{
for (int i = 0 ; i < nbtest ; i++){
unsigned long long Wi = lowSumPol[nbiter + i] % (1 << known_low);
u64 Wi = lowSumPol[nbiter + i] % (1 << known_low);
for (int j = 0 ; j < k ; j++){
unsigned long long Xij = tabX[i*k + j]; //unrotate(X[i], j);
unsigned long long goodYi1 = (((Xij % (1 << known_low)) ^ Wi) << known_up) ^ (j ^ (Xij >> (k - known_up)));
unsigned long long goodYi2 = (goodYi1 - 1) % (1 << (known_low + known_up));
u64 Xij = unrotate(X[i + nbiter], j);
u64 goodYi1 = (((Xij % (1 << known_low)) ^ Wi) << known_up) ^ (j ^ (Xij >> (k - known_up)));
u64 goodYi2 = (goodYi1 - 1) % (1 << (known_low + known_up));
setbit(goodY, i, goodYi1, v);
setbit(goodY, i, goodYi2, v);
}
......@@ -81,19 +85,7 @@ void getGoodY(char* goodY, unsigned long long* tabX, unsigned long long* lowSumP
}
void getTabTmp(unsigned long long* tabTmp, unsigned long long* X, unsigned long long* lowSumPol, unsigned long long* sumPolY)
{
for(int i = 0 ; i < k ; i++){
for(int j = 0 ; j < nbiter ; j++){
unsigned long long uX = unrotate(X[j], i);
tabTmp[i * nbiter + j] = (((lowSumPol[j] % (1 << known_low)) ^ (uX % (1 << known_low))) << known_up)
+ (i ^ (uX >> (k - known_up))) - sumPolY[j];
}
}
}
static inline int checkY(const char* goodY, int i, unsigned long long Y)
static inline int checkY(const char* goodY, int i, u64 Y)
{
// Y = Y(1 << (known_up + known_low - 1));
int idx = Y + i * (1 << (known_up + known_low));
......@@ -103,38 +95,48 @@ static inline int checkY(const char* goodY, int i, unsigned long long Y)
return (goodY[j] >> l) & 1;
}
static inline int confirm(unsigned long long Y0, unsigned long long DS640, const unsigned long long* sumPolTest, const char* goodY)
static inline bool confirm(u64 Y0, u64 DS640, const struct task_t *task)
{
/**** Confirmation du DS640 ****/
unsigned long long tmp2 = ((unsigned long long) polA[nbiter]) * DS640 + sumPolTest[0]; //ATTENTION cast pcg128_t
unsigned long long Yi1 = (Y0 + (tmp2 >> (k - known_low - known_up))) % (1 << (known_low + known_up)); //avec ou sans retenue OK!
if (!(checkY(goodY, 0, Yi1)))
return 0;
for (int i = 1 ; i < nbtest ; i++) {
unsigned long long tmp2 = ((unsigned long long) polA[i + nbiter]) * DS640 + sumPolTest[i]; //ATTENTION cast pcg128_t
unsigned long long Yi1 = (Y0 + (tmp2 >> (k - known_low - known_up))) % (1 << (known_low + known_up)); //avec ou sans retenue OK!
if (!(checkY(goodY, i, Yi1))) {
for (int i = 0 ; i < nbtest ; i++) {
u64 tmp2 = ((u64) polA[i + nbiter]) * DS640 + task->sumPolTest[i]; //ATTENTION cast pcg128_t
u64 Yi1 = (Y0 + (tmp2 >> (k - known_low - known_up))) % (1 << (known_low + known_up)); //avec ou sans retenue OK!
if (!(checkY(task->goodY, i, Yi1)))
return 0;
}
}
return 1;
}
/* cf. https://stackoverflow.com/questions/17035464/a-fast-method-to-round-a-double-to-a-32-bit-int-explained#comment61972557_17035583 */
static inline long long crazy_round(double x)
{
union { double d; long long l; } magic;
magic.d = x + 6755399441055744.0;
magic.l <<= 13;
magic.l >>= 13;
return magic.l;
}
int solve_isgood(const char* goodY, const int* rot, const unsigned long long* tabTmp, const unsigned long long* sumPolY, const unsigned long long* sumPolTest)
static inline long long light_crazy_round(double x)
{
unsigned long long tmp[nbiter];
union { double d; long long l; } magic;
magic.d = x;
magic.l <<= 13;
magic.l >>= 13;
return magic.l;
}
/**** Recherche du DS640 ****/
bool solve_isgood(const struct task_t *task)
{
/**** Recherche du DS640 ****/
u64 tmp[nbiter];
for (int i = 0; i < nbiter; i++) //Y
tmp[i] = tabTmp[i + nbiter * rot[i]];
tmp[i] = task->tabTmp[i + nbiter * task->rot[i]];
unsigned long long Y0 = tmp[0] + sumPolY[0];
u64 Y0 = tmp[0] + task->sumPolY[0];
unsigned long long tmp3[nbiter - 1];
double tmp3[nbiter - 1];
for (int i = 0; i < nbiter - 1; i++) //DY
tmp3[i] = (tmp[i+1] - tmp[i]) % (1 << (known_low + known_up));
......@@ -143,29 +145,30 @@ int solve_isgood(const char* goodY, const int* rot, const unsigned long long* ta
u[i] = 0.0;
for (int j = 0; j<nbiter - 1; j++)
u[i] += invG[i * (nbiter-1) + j] * tmp3[j];
u[i] += 6755399441055744.0;
}
unsigned long long DS640 = 0;
for(int i = 0 ; i < nbiter-1 ; i++)
DS640 += Greduite[i] * llround(u[i]);
u64 DS640 = 0;
for (int i = 0; i < nbiter-1; i++) {
DS640 += Greduite[i] * light_crazy_round(u[i]);
}
return confirm(Y0, DS640, sumPolTest, goodY);
return confirm(Y0, DS640, task);
}
void solve(unsigned long long* DS640, unsigned long long* Y0, char* goodY, int* rot, unsigned long long* tabTmp, unsigned long long* sumPolY, unsigned long long* sumPolTest)
void solve(const struct task_t *task, u64* DS640, u64* Y0)
{
unsigned long long tmp[nbiter];
u64 tmp[nbiter];
/**** Recherche du DS640 ****/
for (int i = 0; i < nbiter; i++) //Y
tmp[i] = tabTmp[i + nbiter * rot[i]];
tmp[i] = task->tabTmp[i + nbiter * task->rot[i]];
*Y0 = (tmp[0] + sumPolY[0]) % (1 << (known_low + known_up));
*Y0 = (tmp[0] + task->sumPolY[0]) % (1 << (known_low + known_up));
unsigned long long tmp3[nbiter - 1];
u64 tmp3[nbiter - 1];
for(int i = 0; i < nbiter - 1; i++) //DY
tmp3[i] = (tmp[i+1] - tmp[i]) % (1 << (known_low + known_up));
......@@ -177,14 +180,15 @@ void solve(unsigned long long* DS640, unsigned long long* Y0, char* goodY, int*
}
*DS640 = 0;
for(int i = 0 ; i < nbiter-1 ; i++)
(*DS640) += Greduite[i] * llround(u[i]);
for(int i = 0 ; i < nbiter-1 ; i++) {
(*DS640) += Greduite[i] * crazy_round(u[i]);
}
assert(confirm(*Y0, *DS640, sumPolTest, goodY));
assert(confirm(*Y0, *DS640, task));
}
void pcg(pcg128_t *S, unsigned long long* X, pcg128_t S0, pcg128_t* c, int n)
void pcg(pcg128_t *S, u64* X, pcg128_t S0, pcg128_t* c, int n)
{
struct pcg_state_setseq_128 rng;
pcg_setseq_128_srandom_r(&rng, S0, *c);
......@@ -196,3 +200,39 @@ void pcg(pcg128_t *S, unsigned long long* X, pcg128_t S0, pcg128_t* c, int n)
*c = rng.inc;
}
void init_task(struct task_t *t)
{
t->goodY = malloc((1<<(known_low + known_up)) * sizeof(char) * nbtest / 8);
for (u64 y = 0 ; y < nbtest * (1<< (known_low + known_up)) / 8 ; y++)
t->goodY[y] = 0;
}
void prepare_task(const u64 *X, u64 W0, u64 WC, struct task_t *t)
{
for (int i = 0 ; i < nbiter ; i++) {
t->lowSumPol[i] = (W0 * ((u64) powA[i]) + WC * ((u64) polA[i]));
t->sumPolY[i] = (polA[i] * WC + powA[i] * W0) >> (k - known_up);
}
for (int i = 0 ; i < nbtest ; i++) {
t->lowSumPol[nbiter + i] = (W0 * ((u64) powA[i + nbiter]) + WC * ((u64) polA[i + nbiter]));
t->sumPolTest[i] = W0 * ((u64) (powA[i + nbiter] >> known_low) - 1) + WC * ((u64) (polA[i + nbiter] >> known_low) - 1);
}
getGoodY(t->goodY, X, t->lowSumPol, 1);
// getTabTmp(t->tabTmp, X, t->lowSumPol, t->sumPolY);
for (int i = 0 ; i < k ; i++) {
for (int j = 0 ; j < nbiter ; j++) {
u64 uX = unrotate(X[j], i);
t->tabTmp[i * nbiter + j] = (((t->lowSumPol[j] % (1 << known_low)) ^ (uX % (1 << known_low))) << known_up) + (i ^ (uX >> (k - known_up))) - t->sumPolY[j];
}
}
for (int i = 0 ; i < nbiter ; i++)
t->rot[i] = 0;
}
void finish_task(const u64 *X, struct task_t *t)
{
getGoodY(t->goodY, X, t->lowSumPol, 0);
}
\ No newline at end of file
#include <stdint.h>
//#include "pcg_oneseq.h"
#include "pcg_setseq.h" //inclus dans pcg_oneseq
#include <omp.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>
#include "pcg_setseq.h"
typedef long long i64;
typedef unsigned long long u64;
/***** Macro et Variables globales *****/
#define k 64
#define known_up 6
#define known_low 11 //à relancer 3 fois, sur des X differents (X0 - X4 ,X5 - X9,...) puis lancé avec 12 et enfin 13
#define nbiter 5
#define nboutput 31
#define nbtest 3
#define nbtest 4
#define nboutput (nbiter + nbtest)
extern pcg128_t a;
extern pcg128_t powA[nboutput];
extern pcg128_t polA[nboutput];
int nb_thread;
pcg128_t a;
pcg128_t powA[nboutput];
pcg128_t polA[nboutput];
extern u64 Greduite[16];
extern double invG[16];
extern unsigned long long Greduite[16];
extern float invG[16];
struct task_t {
u64 lowSumPol[nbiter + nbtest];
u64 sumPolY[nbiter];
u64 sumPolTest[nbtest];
u64 tabTmp[k * nbiter];
int rot[nbiter];
char *goodY;
};
/***** Fonctions *****/
void init_var_globales();
void init_task(struct task_t *t);
void prepare_task(const u64 *X, u64 W0, u64 WC, struct task_t *t);
void finish_task(const u64 *X, struct task_t *t);
double wtime();
static inline void prodMatVecFFU(float* res, float* M, unsigned long long* v, int n){
int i, j;
for(i=0 ; i<n ; i++){
res[i] = 0;
for(j=0 ; j<n ; j++)
res[i]+= M[i * n + j] * v[j];
}
}
////////////////Fonctions pour la récupération de S//////////////
static inline void rotateX(unsigned long long* rX, const unsigned long long* X, const int* rot){ //pas verifié, repris de pcg_random
for(int i = 0 ; i < nbiter ; i++)
rX[i]= (X[i] >> rot[i]) | (X[i] << ((- rot[i]) & (k-1)));
}
static inline void unrotateX(unsigned long long* urX, const unsigned long long* X, const int* rot){//pas verifié, repris de pcg_random
for(int i = 0 ; i < nbiter ; i++)
urX[i]= (X[i] >> ((- rot[i]) & (k-1))) | (X[i] << rot[i]);
}
static inline unsigned long long unrotate1(unsigned long long Xi)
{
return (Xi >> (k-1)) | (Xi << 1);
}
static inline unsigned long long unrotate(unsigned long long Xi, int i)
{
return (Xi >> (k-i)) | (Xi << i);
}
char* setupGoodY();
void getGoodY(char* goodY, unsigned long long* tabX, unsigned long long* lowSumPol, int v);
void getTabTmp(unsigned long long* tabTmp, unsigned long long* X, unsigned long long* lowSumPol, unsigned long long* sumPolY);
void getY(unsigned long long *Y, unsigned long long W0, unsigned long long WC, int* rot, unsigned long long* uX);
void getYprim(unsigned long long *Yprim, unsigned long long *Y, unsigned long long W0, unsigned long long WC);
void getDY(unsigned long long *DY, unsigned long long* Yprim);
void FindDS64(unsigned long long* DS64, unsigned long long *Y0, unsigned long long* uX,int* rot, unsigned long long* lowSumPol, unsigned long long* sumPolY);
unsigned long long FindDS640(unsigned long long* Y, unsigned long long* uX, int* rot,unsigned long long *lowSumPol,unsigned long long* sumPolY);
int testDS640(unsigned long long DS640, unsigned long long* X, unsigned long long Y0, unsigned long long* sumPolTest, unsigned long long* lowSumPol);
void solve(unsigned long long* DS640, unsigned long long* Y0, char* goodY, int* rot, unsigned long long* tabTmp, unsigned long long* sumPolY, unsigned long long* sumPolTest);
int solve_isgood(const char* goodY, const int* rot, const unsigned long long* tabTmp, const unsigned long long* sumPolY, const unsigned long long* sumPolTest);
int testValid(FILE* f, int n);
//void pcgone(pcg128_t *S, unsigned long long* X, pcg128_t S0, int n);
void pcg(pcg128_t *S, unsigned long long* X, pcg128_t S0, pcg128_t* c, int n);
/***** Tests *****/
int testFonctions();
void printVal(pcg128_t S0, pcg128_t c);
bool solve_isgood(const struct task_t *task);
void solve(const struct task_t *task, u64* DS640, u64* Y0);
void pcg(pcg128_t *S, u64* X, pcg128_t S0, pcg128_t* c, int n);
///// tout ceci ne sert que dans test.c
void getY(u64 *Y, u64 W0, u64 WC, int* rot, u64* uX);
void getYprim(u64 *Yprim, u64 *Y, u64 W0, u64 WC);
void getDY(u64 *DY, u64* Yprim);
void FindDS64(u64* DS64, u64 *Y0, u64* uX,int* rot, u64* lowSumPol, u64* sumPolY);
u64 FindDS640(u64* Y, u64* uX, int* rot,u64 *lowSumPol,u64* sumPolY);
int testDS640(u64 DS640, u64* X, u64 Y0, u64* sumPolTest, u64* lowSumPol);
\ No newline at end of file
......@@ -2,9 +2,24 @@
////////////////Fonctions pour la récupération de S//////////////
static inline void prodMatVecFFU(double* res, double* M, u64* v, int n){
int i, j;
for(i=0 ; i<n ; i++){
res[i] = 0;
for(j=0 ; j<n ; j++)
res[i]+= M[i * n + j] * v[j];
}
}
static inline u64 unrotate1(u64 Xi)
{
return (Xi >> (k-1)) | (Xi << 1);
}
/* Y = S[k-known_up:k+known_low] */
void getY(unsigned long long *Y, unsigned long long W0, unsigned long long WC, int* rot, unsigned long long* uX){
void getY(unsigned long long *Y, unsigned long long W0, unsigned long long WC, int* rot, unsigned long long* uX)
{
for(int i = 0 ; i < nbiter ; i++){
Y[i] = ((((unsigned long long) ((polA[i] * WC + powA[i] * W0) % (1 << known_low))) ^ (uX[i] % (1 << known_low))) << known_up) + (rot[i] ^ (uX[i] >> (k - known_up)));
}
......@@ -23,7 +38,8 @@ void getDY(unsigned long long *DY, unsigned long long* Yprim){
}
/* DS64 = différence sur S'[known_low:known_low+k], avec S' = S - composante en WC, W0 */
void FindDS64(unsigned long long* DS64, unsigned long long* Y0, unsigned long long* uX,int* rot, unsigned long long* lowSumPol, unsigned long long* sumPolY){
void FindDS64(unsigned long long* DS64, unsigned long long* Y0, unsigned long long* uX,int* rot, unsigned long long* lowSumPol, unsigned long long* sumPolY)
{
unsigned long long tmp[nbiter];
for(int i = 0 ; i < nbiter ; i++){//Y
tmp[i] = (((lowSumPol[i] % (1 << known_low)) ^ (uX[i] % (1 << known_low))) << known_up) + (rot[i] ^ (uX[i] >> (k - known_up)));
......@@ -34,13 +50,13 @@ void FindDS64(unsigned long long* DS64, unsigned long long* Y0, unsigned long lo
for(int i = 0 ; i < nbiter - 1 ; i++){ //DY
tmp[i] = (tmp[i+1] - tmp[i]) % (1<<(known_low + known_up));
tmp[i] = tmp[i] << (k - known_up - known_low);
// tmp[i] = tmp[i] << (k - known_up - known_low);
}
float u[nbiter-1];
double u[nbiter-1];
prodMatVecFFU(u, invG, tmp, nbiter-1);
for(int i = 0 ; i < nbiter-1 ; i++)
tmp[i] = (unsigned long long) llroundf(u[i]);
tmp[i] = (unsigned long long) llround(u[i]);
*DS64 = 0;
for(int i = 0 ; i < nbiter-1 ; i++)
......
#include "fonctions.h"
#include <stdlib.h>
#include <stdio.h>
#include <err.h>
#include <string.h>
#include <time.h>
int main(){
#include <omp.h>
#include <mpi.h>
/* INITIALISATION DES PARAMETRES */
#include "fonctions.h"
init_var_globales();
static const bool VERBOSE = true;
static const char *CHKPT_FILENAME = "checkpoint.bin";
static const char *ALT_CHKPT_FILENAME = "checkpoint.bin.tmp";
/********** Calculs/Tests plus ou moins à la con ***********/
pcg128_t S0 = (((pcg128_t) 5995207026785010249u) << k) + ((pcg128_t) 179350442155841024u);
pcg128_t c = ((((pcg128_t) 6364136223846793005u) << k) + 1442695040888963407u) >> 1;
enum chkpt_status {GOOD_CHECKPOINT, NO_CHECKPOINT, BAD_CHECKPOINT};
pcg128_t vraiS[nboutput];
unsigned long long X[nboutput];
struct checkpoint_t {
int nranks;
int known_bits;
u64 X[nbiter];
u64 done;
time_t when;
};
FILE *f = fopen("results", "w");
//printVal(S0, c);
enum chkpt_status load_chkpt(int size, const u64 *X, u64 *done)
{
*done = 0;
pcg(vraiS, X, S0, &c, nboutput);
double t1 = wtime();
/* try to load checkpoint file */
FILE *f = fopen(CHKPT_FILENAME, "r");
if (f == NULL) {
perror("Cannot open checkpoint file");
return NO_CHECKPOINT;
}
struct checkpoint_t chkpt;
size_t check = fread(&chkpt, sizeof(chkpt), 1, f);
fclose(f);
if (check != 1) {
perror("Cannot read checkpoint from file");
return NO_CHECKPOINT;
}
unsigned long long tabX[k * nbtest];
for (int i = 0; i < nbtest; i++)
for (int j = 0; j < k; j++)
tabX[i * k + j] = unrotate(X[i + nbiter], j);
//unsigned long long done = 0;
unsigned long long W0 = 5018, WC = 335;
char* goodY = setupGoodY();
#pragma omp parallel for
for (W0 = 5018; W0 < /*(1<<known_low)*/ 5019 ; W0++){//W0=5018
for(WC = 335 ; WC < /*(1<<known_low)*/ 336 ; WC++){//WC = 335
/**** Polynômes en WC et W0 utilisés dans la résolution ****/
unsigned long long lowSumPol[nbiter + nbtest];
unsigned long long sumPolY[nbiter];
unsigned long long sumPolTest[nbtest];
for(int i = 0 ; i < nbiter ; i++){
lowSumPol[i] = (W0 * ((unsigned long long) powA[i]) + WC * ((unsigned long long) polA[i]));
sumPolY[i] = (polA[i] * WC + powA[i] * W0) >> (k - known_up);
}
for(int i = 0 ; i < nbtest ; i++){
lowSumPol[nbiter + i] = (W0 * ((unsigned long long) powA[i + nbiter]) + WC * ((unsigned long long) polA[i + nbiter]));
sumPolTest[i] = W0 * ((unsigned long long) (powA[i + nbiter] >> known_low) - 1) + WC * ((unsigned long long) (polA[i + nbiter] >> known_low) - 1);
}
getGoodY(goodY, tabX, lowSumPol, 1);
unsigned long long tabTmp[k * nbiter];
getTabTmp(tabTmp, X, lowSumPol, sumPolY);
/*Variables privées*/
int rot[nbiter];
/* verify checkpoint */
if (size != chkpt.nranks) {
printf("Communicator size mismatch. Now=%d, in checkpoint=%d.\n", size, chkpt.nranks);
return BAD_CHECKPOINT;
}
if (known_low != chkpt.known_bits) {
printf("Guessed bits mismatch. Now=%d, in checkpoint=%d.\n", known_low, chkpt.known_bits);
return BAD_CHECKPOINT;
}
for (int i = 0; i < nbiter; i++)
rot[i] = 0;
unsigned long long DS640;
unsigned long long Y0;
if (X[i] != chkpt.X[i]) {
printf("X[%d] mismatch. Now=%llx, in checkpoint=%llx.\n", i, X[i], chkpt.X[i]);
return BAD_CHECKPOINT;
}
/* checkpoint is fine */
struct tm *tmp;
tmp = localtime(&chkpt.when);
if (tmp == NULL)
err(1, "localtime");
char outstr[255];
if (strftime(outstr, sizeof(outstr), "%Y-%m-%d %H:%M:%S", tmp) == 0)
errx(1, "strftime returned 0");
printf("Correct checkpoint loaded from %s. Time = %s\n", CHKPT_FILENAME, outstr);
printf("Tasks done per MPI rank: %lld.\n", chkpt.done);
*done = chkpt.done;
return GOOD_CHECKPOINT;
}
void save_chkpt(const u64 *X, u64 done)
{
struct checkpoint_t chkpt;
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* prepare checkpoint data */
chkpt.nranks = size;
chkpt.known_bits = known_low;
for (int i = 0; i < nbiter; i++)
chkpt.X[i] = X[i];
chkpt.done = done;
chkpt.when = time(NULL);
/* try to open alternate checkpoint file */
FILE *f = fopen(ALT_CHKPT_FILENAME, "w");
if (f == NULL) {
perror("WARNING ! Cannot open temporary checkpoint file");
return;
}
size_t check = fwrite(&chkpt, sizeof(chkpt), 1, f);
fclose(f);
if (check != 1) {
perror("WARNING ! Cannot write temporary checkpoint file");
return;
}
/* writing the new checkpoint was successful: we erase an eventual old one. */
if (rename(ALT_CHKPT_FILENAME, CHKPT_FILENAME) != 0)
perror("WARNING ! Cannot rename tmp checkpoint file");
}
/* invoked at the beginning. Sets the range for the current MPI rank. */
void restart(const u64 *X, u64 *range_start, u64 *range_end, u64 *done)
{
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* default */
u64 n_tasks = 1 << (2 * known_low - 1);
u64 tasks_per_rank = n_tasks / size;
if (n_tasks % size != 0)
tasks_per_rank += 1;
*range_start = rank * tasks_per_rank;
*range_end = (rank + 1) * tasks_per_rank;
if (rank == 0) {
enum chkpt_status status = load_chkpt(size, X, done);
switch (status) {
case BAD_CHECKPOINT:
printf("BAD CHECKPOINT. Refusing to start. Please clean up the mess\n");
exit(EXIT_FAILURE);
case NO_CHECKPOINT:
printf("COLD START.\n");
break;
case GOOD_CHECKPOINT:
printf("WARM START.\n");
break;
}
}
for(int r = 0 ; r < 1<<(nbiter * known_up) ; r++){//cette boucle n'est pas parralélisable direct !
MPI_Bcast(done, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD);
*range_start += *done;
/***** Modification de rot et unrotX *****/
rot[0]=(rot[0] + 1) % k;
/* clip */
if (*range_end > n_tasks)
*range_end = n_tasks;
if (VERBOSE)
printf("MPI rank %d : [%llx:%llx]\n", rank, *range_start, *range_end);
}
/* checkpoints the current MPI rank */
void checkpoint(const u64 *X, u64 done)
{
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
/* synchronize everybody */
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0)
save_chkpt(X, done);
}
void result_found(const u64 *X, u64 W0, u64 WC, u64 r)
{
/* we print it just in case something goes wrong... */
printf("solution found : W_0 = %04llx / W_c = %04llx / r = %08llx\n", W0, WC, r);
u64 c = (W0 << (known_low - 1)) + WC;
char filename[255];
sprintf(filename, "solution-%08llx.txt", c);
FILE *f = fopen(filename, "a");
if (f == NULL)
err(1, "cannot open solution file");
for (int i = 0; i < nbiter; i++)
fprintf(f, "X[%d] = %llx\n", i, X[i]);
fprintf(f, "W_0 = %04llx / W_c = %04llx / r = %08llx\n", W0, WC, r);
fprintf(f, "==============================================\n");
fclose(f);
}
void do_task(u64 current, struct task_t *task, const u64 *X)
{
u64 W0 = current >> (known_low - 1);
u64 WC = 1 + 2 * (current % (1 << (known_low - 1)));
printf("Doing task %lld / %lld\n", W0, WC);
prepare_task(X, W0, WC, task);
for (u64 r = 0; r < 1 << (nbiter * known_up); r++) {
task->rot[0] = (task->rot[0] + 1) % k;
int i = 0;
while(rot[i] == 0 && i < nbiter){
while (task->rot[i] == 0 && i < nbiter) {
i++;
rot[i]=(rot[i] + 1) %k;
task->rot[i] = (task->rot[i] + 1) % k;
}
if (solve_isgood(task))
result_found(X, W0, WC, r);
}
finish_task(X, task);
}
int main(int argc, char **argv)
{
int provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
if (provided < MPI_THREAD_FUNNELED)
errx(1, "MPI Thread support not sufficient");
/* INITIALISATION DES PARAMETRES */
init_var_globales();
if (solve_isgood(goodY, rot, tabTmp, sumPolY, sumPolTest)) {
fprintf(f)
solve(&DS640, &Y0, goodY, rot, tabTmp, sumPolY, sumPolTest);
printf("candidat DS64 trouvé !!\n");
printf("%llu\n", DS640);
printf("temps pour trouver la solution = %f\n", wtime() - t1 );
u64 X[9];
X[0] = 0x47a42ee112e8afb9;
X[1] = 0xf5e7948dbc0c7e26;
X[2] = 0x91724bdca45a78a4;
X[3] = 0x1be0e7e5b398b248;
X[4] = 0x6f8b727451e185a8;
X[5] = 0x976d59bba78ef4e2;
X[6] = 0xc588c4c6c9052cba;
X[7] = 0x9cc0fc58615e1b87;
X[8] = 0xec7c5d6ee9992147;
u64 range_start, range_end, done;
restart(X, &range_start, &range_end, &done);
double t1 = wtime();
/* init all tasks */
int T = omp_get_max_threads();
struct task_t task[T];
#pragma omp parallel
{
int tid = omp_get_thread_num();
init_task(&task[tid]);
}
/*if(DS640 == 7304601715607344736u){
printf("On a le bon !\n");
printf("DS640 = %llu\n", DS640);
}*/
/* DEBUG */
// W_0 = 7984
// W_c = 7673
range_start = (7984 << (known_low - 1)) + (7673 - 1) / 2;
while (range_start < range_end) {
#pragma omp parallel
{
int tid = omp_get_thread_num();
if (range_start + tid < range_end) {
do_task(range_start + tid, &task[tid], X);
}
getGoodY(goodY, tabX, lowSumPol, 0);
//#pragma omp atomic
// done++;;
}
range_start += T;
done += T;
checkpoint(X, done);
}
printf("temps total = %f\n", wtime() - t1);
MPI_Finalize();
return(0);
}
Sur ppti-gpu-1 (44 coeurs skylake 6152 (2017) @ 2.1Ghz) :
vs jean-zay : cascade lake 6248 (2019) @ 2.5Ghz
known_low = 11
==============
Avec ICC
88 tâches en même temps --> 40.3s (2341 Msolve/s, 26.7 Msolve/s/thread)
44 tâches en même temps --> 26.9s (1753 Msolve/s, 39.8M Msolve/s/thread)
22 tâches en même temps --> 20.1s (1175 Msolve/s, 53.4 Msolve/s/thread)
1 tâches en même temps --> 17.8s (60.5 Msolve/s/thread)
et maintenant, on teste la parallélisation à l'intérieur des tâches
88 threads --> 10.9s, 1574.0M/s, 17.9M/s/thread
44 threads --> 20.53s, 836.7M/s, 19.0M/s/thread
22 threads --> 32.88s, 522.5M/s, 23.8M/s/thread
11 threads --> 58.52s, 293.6M/s, 26.7M/s/thread
c'est moins bien !
2048^2 / 80 tâches de 33.6s == 52 428 "jobs" de 33.6s --> 20000 h*CPU sur jz
known_low = 12
==============
80000 h*CPU sur jz
known_low = 13
==============
320000 h*CPU sur jz
\ No newline at end of file
......@@ -3,34 +3,45 @@
#include "fonctions.h"
static inline void unrotateX(u64* urX, const u64* X, const int* rot)
{
for(int i = 0 ; i < nbiter ; i++)
urX[i]= (X[i] >> ((- rot[i]) & (k-1))) | (X[i] << rot[i]);
}
int testFonctions()
{
assert(known_low == 11);
int rot[nboutput];
pcg128_t S0 = (((pcg128_t) 5995207026785010249u) << k) + ((pcg128_t) 179350442155841024u);
u64 X[nboutput];
pcg128_t vraiS[nboutput];
unsigned long long X[nboutput];
pcg128_t S0 = (((pcg128_t) 5995207026785010249u) << k) + ((pcg128_t) 179350442155841024u);
pcg128_t c = ((((pcg128_t) 6364136223846793005u) << k) + 1442695040888963407u) >> 1;
pcg(vraiS, X, S0, &c, nboutput);
if (vraiS[2] != (((pcg128_t) 1792771836637573954u) << k) + ((pcg128_t) 11139816115278170276u)) {
printf("erreur sur pcg\n");
return 0;
}
for(int i = 0 ; i < nboutput ; i++){
rot[i] = (int) (vraiS[i] >> (2 * k - known_up));
}
unsigned long long W0 = (unsigned long long) (vraiS[0] % (1<<known_low));
unsigned long long WC = (unsigned long long) (c % (1<<known_low));
u64 W0 = (u64) (vraiS[0] % (1<<known_low));
u64 WC = (u64) (c % (1<<known_low));
struct task_t task;
init_task(&task);
prepare_task(X, W0, WC, &task);
for (int i = 0 ; i < nbiter ; i++)
task.rot[i] = (int) (vraiS[i] >> (2 * k - known_up));
printf("1..7\n");
printf("# nbiter = %d\n", nbiter);
printf("# known_low = %d\n", known_low);
/* test unrotate */
unsigned long long uX[nbiter];
unrotateX(uX, X, rot);
u64 uX[nbiter];
unrotateX(uX, X, task.rot);
if (uX[0] != 15007519919903780682u) {
printf("not ok 1 - erreur sur unrotateX\n");
} else {
......@@ -38,11 +49,11 @@ int testFonctions()
}
/* test getY */
unsigned long long Y[nbiter];
u64 Y[nbiter];
printf("# W0 = %llu\n", W0);
printf("# Wc = %llu\n", WC);
getY(Y, W0, WC, rot, uX);
getY(Y, W0, WC, task.rot, uX);
if(Y[3] != 129714){
printf("not ok 2 - erreur sur getY. Y[3] == %llu / attendu : 129714\n", Y[3]);
} else {
......@@ -50,7 +61,7 @@ int testFonctions()
}
/* test getYprim */
unsigned long long Yprim[nbiter];
u64 Yprim[nbiter];
getYprim(Yprim, Y, W0, WC);
if(Yprim[1] != 93486){
printf("not ok 3 - erreur sur getY\n");
......@@ -59,7 +70,7 @@ int testFonctions()
}
/* test getDY */
unsigned long long DY[nbiter-1];
u64 DY[nbiter-1];
getDY(DY, Yprim);
if(DY[0] != 14609){
printf("not ok 4 - erreur sur getDY\n");
......@@ -67,23 +78,9 @@ int testFonctions()
printf("ok 4 - getDY\n");
}
/**** Polynômes en WC et W0 utilisés dans la résolution ****/
unsigned long long lowSumPol[nbiter + nbtest];
unsigned long long sumPolY[nbiter];
unsigned long long sumPolTest[nbtest];
for(int i = 0 ; i < nbiter ; i++){
lowSumPol[i] = (W0 * ((unsigned long long) powA[i]) + WC * ((unsigned long long) polA[i]));
sumPolY[i] = (polA[i] * WC + powA[i] * W0) >> (k - known_up);
}
for(int i = 0 ; i < nbtest ; i++){
lowSumPol[nbiter + i] = (W0 * ((unsigned long long) powA[i + nbiter]) + WC * ((unsigned long long) polA[i + nbiter]));
sumPolTest[i] = W0 * ((unsigned long long) (powA[i + nbiter] >> known_low) - 1) + WC * ((unsigned long long) (polA[i + nbiter] >> known_low) - 1);
}
/* test FindDS64 */
unsigned long long DS64[nbiter - 1];
FindDS64(DS64, Y, uX, rot, lowSumPol, sumPolY);
u64 DS64[nbiter - 1];
FindDS64(DS64, Y, uX, task.rot, task.lowSumPol, task.sumPolY);
if (DS64[0] != 2055999906439120392u) {
printf("not ok 5 - erreur sur FindDS64\n");
} else {
......@@ -106,35 +103,26 @@ int testFonctions()
}
}*/
unsigned long long tabX[k * nbtest];
for (int i = 0; i < nbtest; i++)
for (int j = 0; j < k; j++)
tabX[i * k + j] = unrotate(X[i + nbiter], j);
/*test getGoodY*/
char* goodY = setupGoodY();
getGoodY(goodY, tabX, lowSumPol, 1);
unsigned long long tabTmp[k * nbiter];
getTabTmp(tabTmp, X, lowSumPol, sumPolY);
unsigned long long uXnbiter1 = unrotate(X[nbiter + 1], rot[nbiter + 1]);
unsigned long long Ynbiter1 = ((((unsigned long long) ((polA[nbiter + 1] * WC + powA[nbiter + 1] * W0) % (1 << known_low))) ^ (uXnbiter1 % (1 << known_low))) << known_up) + (rot[nbiter + 1] ^ (uXnbiter1 >> (k - known_up)));
if(!goodY[Ynbiter1 + (1<<(known_low + known_up))]){
printf("not ok 6 - erreur sur getGoodY\n");
} else {
printf("ok 6 - getGoodY\n");
}
unsigned long long DS640, Y0;
if(!solve_isgood(goodY, rot, tabTmp, sumPolY, sumPolTest)){
printf("not ok 7 - erreur sur solve\n");
// u64 uXnbiter1 = unrotate(X[nbiter + 1], rot[nbiter + 1]);
// u64 Ynbiter1 = ((((u64) ((polA[nbiter + 1] * WC + powA[nbiter + 1] * W0) % (1 << known_low))) ^ (uXnbiter1 % (1 << known_low))) << known_up) + (rot[nbiter + 1] ^ (uXnbiter1 >> (k - known_up)));
// if (!checkY(goodY, 1, Ynbiter1)) {
// //if(!goodY[Ynbiter1 + (1<<(known_low + known_up))]){
// printf("not ok 6 - erreur sur getGoodY\n");
// } else {
// printf("ok 6 - getGoodY\n");
// }
if (!solve_isgood(&task)) {
printf("not ok 6 - erreur sur solve_isgood\n");
} else {
printf("ok 7 - solve\n");
printf("ok 6 - solve_isgood\n");
}
solve(&DS640, &Y0, goodY, rot, tabTmp, sumPolY, sumPolTest);
u64 DS640, Y0;
solve(&task, &DS640, &Y0);
printf("ok 7 - solve");
return 1;
}
......@@ -144,23 +132,23 @@ int testFonctions()
void printVal(pcg128_t S0, pcg128_t c) {
int rot[nboutput];
pcg128_t vraiS[nboutput];
unsigned long long X[nboutput];
u64 X[nboutput];
pcg(vraiS, X, S0, &c, nboutput);
printf("setseq : %llu %llu\n", (unsigned long long) (vraiS[2]>>64), (unsigned long long) vraiS[2]);
printf("setseq : %llu %llu\n", (u64) (vraiS[2]>>64), (u64) vraiS[2]);
//printf("%llu %llu\n", (unsigned long long) ((vraiS[1] - vraiS[0])>>64), (unsigned long long) (vraiS[1] - vraiS[0]));
//printf("%llu %llu\n", (u64) ((vraiS[1] - vraiS[0])>>64), (u64) (vraiS[1] - vraiS[0]));
//done
for(int i = 0 ; i < nboutput ; i++){
rot[i] = (int) (vraiS[i] >> (2 * k - known_up));
}
unsigned long long W0 = (unsigned long long) (vraiS[0] % (1<<known_low));
unsigned long long WC = (unsigned long long) (c % (1<<known_low));
u64 W0 = (u64) (vraiS[0] % (1<<known_low));
u64 WC = (u64) (c % (1<<known_low));
printf("W0 : %llu\n", W0);
printf("WC : %llu\n", WC);
unsigned long long uX[nbiter];
u64 uX[nbiter];
unrotateX(uX, X, rot);
printf("uX\n");
for(int i = 0 ; i < nbiter ; i++)
......@@ -170,7 +158,7 @@ void printVal(pcg128_t S0, pcg128_t c){
f = fopen("result.txt","w");
fprintf(f,"W0 : %llu\n", W0);*/
unsigned long long Y[nbiter];
u64 Y[nbiter];
getY(Y, W0, WC, rot, uX);
printf("Y :\n");
for(int i = 0 ; i < nbiter ; i++)
......@@ -179,14 +167,14 @@ void printVal(pcg128_t S0, pcg128_t c){
/***** Tests de vérification des sous-fonctions *****/
unsigned long long Yprim[nbiter];
u64 Yprim[nbiter];
getYprim(Yprim, Y, W0, WC);
printf("Yprim :\n");
for(int i = 0 ; i < nbiter ; i++)
printf("%llu ", Yprim[i]);
printf("\n");
unsigned long long DY[nbiter];
u64 DY[nbiter];
getDY(DY, Yprim);
printf("DY :\n");
for(int i = 0 ; i < nbiter - 1 ; i++)
......@@ -194,19 +182,19 @@ void printVal(pcg128_t S0, pcg128_t c){
printf("\n");
/**** Polynômes en WC et W0 utilisés dans la résolution ****/
unsigned long long lowSumPol[nbiter + nbtest];
unsigned long long sumPolY[nbiter];
unsigned long long sumPolTest[nbtest];
u64 lowSumPol[nbiter + nbtest];
u64 sumPolY[nbiter];
u64 sumPolTest[nbtest];
for(int i = 0 ; i < nbiter ; i++){
lowSumPol[i] = (W0 * ((unsigned long long) powA[i]) + WC * ((unsigned long long) polA[i]));
lowSumPol[i] = (W0 * ((u64) powA[i]) + WC * ((u64) polA[i]));
sumPolY[i] = (polA[i] * WC + powA[i] * W0) >> (k - known_up);
}
for(int i = 0 ; i < nbtest ; i++){
lowSumPol[nbiter + i] = (W0 * ((unsigned long long) powA[i + nbiter]) + WC * ((unsigned long long) polA[i + nbiter]));
sumPolTest[i] = W0 * ((unsigned long long) (powA[i + nbiter] >> known_low) - 1) + WC * ((unsigned long long) (polA[i + nbiter] >> known_low) - 1);
lowSumPol[nbiter + i] = (W0 * ((u64) powA[i + nbiter]) + WC * ((u64) polA[i + nbiter]));
sumPolTest[i] = W0 * ((u64) (powA[i + nbiter] >> known_low) - 1) + WC * ((u64) (polA[i + nbiter] >> known_low) - 1);
}
unsigned long long DS64[nbiter - 1];
u64 DS64[nbiter - 1];
FindDS64(DS64, Y, uX, rot, lowSumPol,sumPolY);
printf("DS64 :\n");
for(int i = 0 ; i < nbiter - 1 ; i++)
......
......@@ -5,64 +5,41 @@
int testValid (FILE* f, int n)
{
int rot[nboutput];
pcg128_t vraiS[nboutput];
unsigned long long X[nboutput];
pcg128_t seeds[2];
int cpt = 0;
char* goodY = setupGoodY();
struct task_t task;
init_task(&task);
for (int i = 0 ; i < n ; i++) {
/* read random seed */
pcg128_t seeds[2];
if (fread(seeds, sizeof(seeds), 1, f) != 1) {
perror("Something went wrong when reading /dev/urandom");
exit(EXIT_FAILURE);
}
/* setup PRNG */
pcg128_t vraiS[nboutput];
u64 X[nboutput];
pcg(vraiS, X, seeds[0], seeds+1, nboutput);
for(int i = 0 ; i < nboutput ; i++)
rot[i] = (int) (vraiS[i] >> (2 * k - known_up));
/* extract "right" guessed values */
u64 W0 = (u64) (vraiS[0] % (1 << known_low));
u64 WC = (u64) (seeds[1] % (1 << known_low));
prepare_task(X, W0, WC, &task);
for(int i = 0; i < nbiter; i++)
task.rot[i] = (int) (vraiS[i] >> (2 * k - known_up));
unsigned long long W0 = (unsigned long long) (vraiS[0] % (1<<known_low));
unsigned long long WC = (unsigned long long) (seeds[1] % (1<<known_low));
/**** Polynômes en WC et W0 utilisés dans la résolution ****/
unsigned long long lowSumPol[nbiter + nbtest];
unsigned long long sumPolY[nbiter];
unsigned long long sumPolTest[nbtest];
for(int i = 0 ; i < nbiter ; i++){
lowSumPol[i] = (W0 * ((unsigned long long) powA[i]) + WC * ((unsigned long long) polA[i]));
sumPolY[i] = (polA[i] * WC + powA[i] * W0) >> (k - known_up);
}
for(int i = 0 ; i < nbtest ; i++){
lowSumPol[nbiter + i] = (W0 * ((unsigned long long) powA[i + nbiter]) + WC * ((unsigned long long) polA[i + nbiter]));
sumPolTest[i] = W0 * ((unsigned long long) (powA[i + nbiter] >> known_low) - 1) + WC * ((unsigned long long) (polA[i + nbiter] >> known_low) - 1);
/* check */
int a = solve_isgood(&task);
cpt += a;
if (a) {
u64 DS640;
u64 Y0;
solve(&task, &DS640, &Y0);
}
unsigned long long DS640;
unsigned long long Y0;
/*unsigned long long uX[nbiter];
unrotateX(uX, X, rot);
unsigned long long Y[nbiter]; //utilisé dans testDS640
getY(Y, W0, WC, rot, uX);
FindDS64(DS64, Y, uX, rot, lowSumPol, sumPolY);
cpt += testDS640(DS64[0], X, Y[0], sumPolTest, lowSumPol);*/
unsigned long long tabX[k * nbtest];
for (int i = 0; i < nbtest; i++)
for (int j = 0; j < k; j++)
tabX[i * k + j] = unrotate(X[i + nbiter], j);
unsigned long long tabTmp[k * nbiter];
getTabTmp(tabTmp, X, lowSumPol, sumPolY);
getGoodY(goodY, tabX, lowSumPol, 1);
int a = solve_isgood(goodY, rot, tabTmp, sumPolY, sumPolTest);
cpt += a;
if (a)
solve(&DS640, &Y0, goodY, rot, tabTmp, sumPolY, sumPolTest);
getGoodY(goodY, tabX, lowSumPol, 0);
/* reset goodY */
finish_task(X, &task);
}
return cpt;
}
......@@ -84,10 +61,13 @@ int main()
printf("1..1\n");
static const int nbtests = 1000000;
int successes = testValid(f, nbtests);
if (successes != nbtests)
printf("not ok 1 - #success = %d / %d\n", successes, nbtests);
bool ok = (known_low == 11) && (successes > 0.635*nbtests);
ok |= (known_low == 12) && (successes > 0.99*nbtests);
ok |= (known_low == 13) && (successes == nbtests);
if (ok)
printf("ok 1 - %d tests OK\n", successes);
else
printf("ok 1 - all %d tests OK\n", nbtests);
printf("not ok 1 - #success = %d / %d\n", successes, nbtests);
exit(0);
}
TARGETS = pcg32-demo pcg32-global-demo pcg32x2-demo pcg64-demo
CPPFLAGS += -I.
PCG = $(HOME)/prison/pcg-c-0.94
CPPFLAGS += -I$(PCG)/include
CFLAGS += -std=c99 -Wall -Wextra
LDFLAGS += -L$(PCG)/src
LDLIBS += -lpcg_random
all: pcg64-challenge pcg64s-challenge
clean:
rm -f *.o pcg64-challenge pcg64s-challenge
......@@ -4,6 +4,8 @@
#include "pcg_variants.h"
#define DEBUG 1
int main()
{
// Seed the full-blown generator with external entropy
......@@ -20,15 +22,21 @@ int main()
}
printf("Seed[0] : %016" PRIx64 " %016" PRIx64 "\n", (uint64_t) (seeds[0] >> 64), (uint64_t) seeds[0]);
printf("Seed[0] : %016" PRIx64 " %016" PRIx64 "\n\n", (uint64_t) (seeds[1] >> 64), (uint64_t) seeds[1]);
printf("Seed[1] : %016" PRIx64 " %016" PRIx64 "\n\n", (uint64_t) (seeds[1] >> 64), (uint64_t) seeds[1]);
pcg64_random_t rng;
pcg64_srandom_r(&rng, seeds[0], seeds[1]);
printf("Predictor input:\n");
for (int i = 0; i < 32; i++)
for (int i = 0; i < 32; i++) {
printf("X[%2d] = 0x%016" PRIx64 "\n", i, pcg64_random_r(&rng));
if (i == 0 && DEBUG) {
int known_low = 13;
printf("known_low = %d\n", known_low);
printf("W_0 = %04lld\n", (unsigned long long) (rng.state % (1 << known_low)));
printf("W_c = %04lld\n", (unsigned long long) (rng.inc % (1 << known_low)));
}
}
printf("\n");
printf("Remaining of the sequence (predictor output, in principle):\n");
for (int i = 32; i < 48; i++)
......
......@@ -25,7 +25,6 @@ int main()
printf("Predictor input:\n");
for (int i = 0; i < 3; i++)
printf("X[%2d] = 0x%016" PRIx64 "\n", i, pcg64s_random_r(&rng));
printf("\n");
printf("Remaining of the sequence (predictor output, in principle):\n");
for (int i = 3; i < 10; i++)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment