-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathnxopt.w
2063 lines (2042 loc) · 54.3 KB
/
nxopt.w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@* Introduction.
This program is a new optimal solver for Rubik's cube. Much like
Korf's original solver, it uses iterated depth-first search with large
pruning tables. It further develops these ideas with a number of
additional small tricks. It currently supports twelve different sizes
of pruning tables that allow you trade off memory consumption and
disk space for solving speed.
@(nxopt.cpp@>=
#include "cubepos.h"
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <sys/time.h>
#include <map>
#include <set>
#include <vector>
#undef CHECK
#undef TRACE
#undef TESTTABLE
using namespace std ;
typedef unsigned long long ull ;
typedef long long ll ;
const int CORNERSYMM = 2187 ;
const int C12_4 = 495 ;
const int C8_4 = 70 ;
unsigned short entropy[3] = { 1, 2, 3 } ;
int base ;
int bidir = 1 ;
int allsols = 0 ;
int numthreads = 1 ;
int mindepth = 0 ;
int maxdepth = 40 ;
#ifndef QUARTER
int leftover = 0 ;
#endif
int symmetry = 0 ;
double order_mult[CANONSEQSTATES] ;
unsigned char c8_4crush[256] ;
unsigned char c8_4expand[C8_4] ;
const int CORNERUNIQ = 9930 ;
struct mind {
unsigned char m, ind ;
} mindbuf[C8_4*139] ;
struct firstlev {
unsigned int *p ;
mind *v ;
int levsiz, levoff ;
} firstlevs[CORNERSYMM] ;
int bc(int i) {
int r = 0 ;
while (i) {
r++ ;
i &= i-1 ;
}
return r ;
}
/**
* Faster corner coordinate computation.
*
* This includes the |C8_4| multiplication, so requires 18 bits for the
* orientation. Then the 4-of-8 is done in the next 8 bits, for a total
* of 26 bits. So:
*
* |fastcornercoords[i][j] = C8_4 * (j % 3) * 3^i (if i < 7) +
* 1 << (18 + i) (if j < 12)|
*
* Note that we only look at the first *7*.
*/
int fastcornercoords[M][8][CUBIES] ;
int pow3[8] ;
void initfastcc() {
pow3[0] = 1 ;
for (int i=1; i<8; i++)
pow3[i] = pow3[i-1] * 3 ;
for (int m=0; m<M; m++) {
int mprime = cubepos::invm[m] ;
for (int ii=0; ii<7; ii++)
for (int c=0; c<CUBIES; c++) {
int c1 = cubepos::rot_corner[mprime][cubepos::corner_val(ii, 0)] ;
int i = cubepos::corner_perm(c1) ;
int c2 = cubepos::corner_ori_add(c, c1) ;
int cc = cubepos::rot_corner[m][c2] ;
fastcornercoords[m][i][c] = cubepos::corner_ori(cc) * pow3[ii] ;
if (cubepos::corner_perm(cc) < 4)
fastcornercoords[m][i][c] += 1<<(18+ii) ;
}
}
}
void getcornercoords(const cubepos &cp, int &cperm, int &cori, int m=0) {
int s = 0 ;
for (int i=0; i<8; i++)
s += fastcornercoords[m][i][cp.c[i]] ;
cperm = c8_4crush[s >> 18] ;
cori = s & 0x3ffff ;
}
int getcornercoord(const cubepos &cp, int m=0) {
int s = 0 ;
for (int i=0; i<8; i++)
s += fastcornercoords[m][i][cp.c[i]] ;
return (s & 0x3ffff) * C8_4 + c8_4crush[s >> 18] ;
}
void setcornercoords(cubepos &cp, int cperm, int cori) {
int orisum = 15 ;
int lo = 0 ;
int hi = 4 ;
cperm = c8_4expand[cperm] ;
for (int i=0; i<8; i++) {
int mori = cori % 3 ;
orisum -= mori ;
if (i == 7)
mori = orisum % 3 ;
else
cori /= 3 ;
if ((cperm >> i) & 1) {
cp.c[i] = cubepos::corner_val(lo, mori) ;
lo++ ;
} else {
cp.c[i] = cubepos::corner_val(hi, mori) ;
hi++ ;
}
}
}
@ In general, when looking up values, we use the following
array to accumulate information.
@(nxopt.cpp@>=
ull fastedges[M][12][CUBIES] ;
@ Now we turn our attention to the various ways we can do
edge orientation (E2). There are only three ways: EO1 uses
4 bits, EO2 uses 8 bits, and EO3 uses 11 bits. We get the
low 11 bits of fastedges to work with to look up these values.
In the final index, the bits are located starting at least
significant bit nine (to save room for the low 9 bits giving
the edge occupancy).
@(nxopt.cpp@>=
const int POW3_6 = 729 ;
const int POW2_12 = 4096 ;
const int POW2_9 = 512 ;
const int MAXB = 3288 ;
#ifdef EO1
const int E2 = 16 ;
const int E2BITS = 4 ;
#else
#ifdef EO2
const int E2 = 256 ;
const int E2BITS = 8 ;
#else
#ifdef EO3
const int E2 = 2048 ;
const int E2BITS = 11 ;
#else
error "Please define one of EO1 EO2 or EO3" ;
#endif
#endif
#endif
#ifndef EO3
unsigned char eolo[POW3_6], eohi[POW3_6] ;
void initeorecur(int togo, int ind, int bits, int nbits) {
if (togo == 0) {
eolo[ind] = bits ;
eohi[ind] = bits << (E2BITS - nbits) ;
} else {
initeorecur(togo-1, 3*ind, bits, nbits) ;
initeorecur(togo-1, 3*ind+1, 2*bits, nbits+1) ;
initeorecur(togo-1, 3*ind+2, 2*bits+1, nbits+1) ;
}
}
#endif
void initeo() {
for (int i=0; i<12; i++) {
int ii = (i + 8) % 12 ;
for (int c=0; c<CUBIES; c++) {
#ifdef EO1
if (c & 8) { // middle cubie
if (ii < 6)
fastedges[0][i][c] = ((cubepos::edge_ori(c) + 1) * pow3[ii]) ;
else
fastedges[0][i][c] =
((cubepos::edge_ori(c) + 1) * pow3[ii-6]) << 10 ;
}
#endif
#ifdef EO2
if (0 == (c & 8)) { // not middle cubie
if (ii < 6)
fastedges[0][i][c] = ((cubepos::edge_ori(c) + 1) * pow3[ii]) ;
else
fastedges[0][i][c] =
((cubepos::edge_ori(c) + 1) * pow3[ii-6]) << 10 ;
}
#endif
#ifdef EO3
if (ii < 11)
fastedges[0][i][c] = cubepos::edge_ori(c) << ii ;
#endif
}
}
#ifndef EO3
initeorecur(6, 0, 0, 0) ;
#endif
}
@ Now we turn our attention to the various ways we can do
edge permutation (E1). There are only four ways: EP1 uses
only the middle edge occupancy; EP2 uses that and the
permutation of the middle edges; EP3 uses the middle and
bot edge occupancy, and EP4 uses the middle and bot edge
occupancy, as well as the permutation of the middle edges.
We get the 34 bits starting at least significant bit 10
to work with. For middle edge occupany, we use two six-bit
fields. For middle and bot edge occupancy, we use two
10-bit fields. For middle edge permutation, we use
one 14-bit field. We always place the occupany bits
starting at least significant bit 20, and the permutation
bits starting at least significant bit 40.
The E1 value takes into account the 495->512 expansion.
@(nxopt.cpp@>=
#ifdef EP1
const int E1 = POW2_9 ;
#else
#ifdef EP2
const int E1 = POW2_9 * 24 ;
#define USEEPERM
#else
#ifdef EP3
const int E1 = POW2_9 * C8_4 ;
#define USEEBOT
#else
#ifdef EP4
const int E1 = POW2_9 * C8_4 * 24 ;
#define USEEPERM
#define USEEBOT
#else
error "Please define one of EP1, EP2, EP3, or EP4."
#endif
#endif
#endif
#endif
char fmepermi[C12_4] ; // just during init
#ifdef USEEPERM
unsigned char fmeperm[12*12*12*6] ;
#endif
#ifdef USEEBOT
unsigned short emphi[POW3_6] ;
unsigned short emplo[POW3_6] ;
unsigned char ebphi[POW3_6] ;
unsigned char ebplo[POW3_6] ;
#else
unsigned short ephi[64] ;
unsigned char eplo[64] ;
#endif
unsigned short bitstoindex[POW2_12] ;
unsigned short c12_4expand[C12_4] ;
unsigned short pack11_9[2048] ;
unsigned char midperm[24][4] ;
void initfmeperm(int bits, int ind, int togo) {
if (togo == 0) {
int perm = --fmepermi[bitstoindex[bits]] ;
#ifdef USEEPERM
fmeperm[ind>>1] = perm ;
#endif
if (bits == 15) {
midperm[perm][ind/1728] = 14 ;
midperm[perm][ind/144%12] = 12 ;
midperm[perm][ind/12%12] = 10 ;
midperm[perm][ind%12] = 8 ;
}
} else {
for (int i=0; i<12; i++)
if (((bits >> i) & 1) == 0)
initfmeperm(bits|(1<<i), 12*ind+i, togo-1) ;
}
}
void initeprecur(int togo, int ind, int bits, int nbits, int bits2, int nbits2) {
if (togo == 0) {
if (nbits <= 4 && nbits2 <= 4) {
#ifdef USEEBOT
emplo[ind] = bitstoindex[bits] ;
emphi[ind] = bitstoindex[(bits<<6)+(1<<(4-nbits))-1] ;
ebplo[ind] = bitstoindex[bits2] ;
ebphi[ind] = bitstoindex[(bits2<<(nbits+2))+(1<<(4-nbits2))-1] ;
#else
ephi[bits] = bitstoindex[(bits<<6)+(1<<(4-nbits))-1] ;
eplo[bits] = bitstoindex[bits] ;
#endif
}
} else {
initeprecur(togo-1, 3*ind, bits*2+1, nbits+1, bits2, nbits2) ;
initeprecur(togo-1, 3*ind+1, bits*2, nbits, bits2*2+1, nbits2+1) ;
initeprecur(togo-1, 3*ind+2, bits*2, nbits, bits2*2, nbits2) ;
}
}
void initep() {
for (int i=0; i<12; i++) {
int ii = (i + 8) % 12 ;
for (int c=0; c<CUBIES; c++) {
#ifdef USEEBOT
int v = (2 + (c >> 3)) % 3 ;
if (ii < 6)
fastedges[0][i][c] += ((ull)(v * pow3[ii])) << 20 ;
else
fastedges[0][i][c] += ((ull)(v * pow3[ii-6])) << 30 ;
#else
fastedges[0][i][c] += ((ull)((c >> 3) & 1)) << (20 + ii) ;
#endif
#ifdef USEEPERM
if (c & 8) {
int ceperm = cubepos::edge_perm(c)-4 ;
fastedges[0][i][c] +=
((ull)(((ii * pow3[ceperm]) << (2 * ceperm)) >> 1)) << 40 ;
}
#endif
if (fastedges[0][i][c] > 1LL << 54)
error("! bad fastedges") ;
}
}
int bitval[13] ;
for (int i=0; i<13; i++)
bitval[i] = 0 ;
for (int i=0; i<POW2_12; i++) {
int nbits = bc(i) ;
if (nbits == 4)
c12_4expand[bitval[nbits]] = i ;
bitstoindex[i] = bitval[nbits]++ ;
}
int at = 0 ;
for (int i=0; i<POW2_12; i++)
if (bc(i) == 4)
pack11_9[i&2047] = at++ ;
initeprecur(6, 0, 0, 0, 0, 0) ;
for (int i=0; i<C12_4; i++)
fmepermi[i] = 24 ;
initfmeperm(0, 0, 4) ;
}
@ Now we have the setters and getters.
@(nxopt.cpp@>=
#ifdef HALF
static const char *metric = "HALF" ;
#ifdef EP1
#ifdef EO1
#define BASE 7
#define DATFILE "nxopth11b.dat"
#endif
#ifdef EO2
#define BASE 8
#define DATFILE "nxopth21b.dat"
#endif
#ifdef EO3
#define BASE 9
#define DATFILE "nxopth31b.dat"
#endif
#endif
#ifdef EP2
#ifdef EO1
#define BASE 8
#define DATFILE "nxopth12b.dat"
#endif
#ifdef EO2
#define BASE 9
#define DATFILE "nxopth22b.dat"
#endif
#ifdef EO3
#define BASE 10
#define DATFILE "nxopth32b.dat"
#endif
#endif
#ifdef EP3
#ifdef EO1
#define BASE 8
#define DATFILE "nxopth13b.dat"
#endif
#ifdef EO2
#define BASE 9
#define DATFILE "nxopth23b.dat"
#endif
#ifdef EO3
#define BASE 10
#define DATFILE "nxopth33b.dat"
#endif
#endif
#ifdef EP4
#ifdef EO1
#define BASE 9
#define DATFILE "nxopth14b.dat"
#endif
#ifdef EO2
#define BASE 11 // ??
#define DATFILE "nxopth24b.dat"
#endif
#ifdef EO3
#define BASE 11 // ??
#define DATFILE "nxopth34b.dat"
#endif
#endif
#endif
#ifdef QUARTER
static const char *metric = "QUARTER" ;
#ifdef EP1
#ifdef EO1
#define BASE 8
#define DATFILE "nxoptq11b.dat"
#endif
#ifdef EO2
#define BASE 9
#define DATFILE "nxoptq21b.dat"
#endif
#ifdef EO3
#define BASE 10
#define DATFILE "nxoptq31b.dat"
#endif
#endif
#ifdef EP2
#ifdef EO1
#define BASE 9
#define DATFILE "nxoptq12b.dat"
#endif
#ifdef EO2
#define BASE 10
#define DATFILE "nxoptq22b.dat"
#endif
#ifdef EO3
#define BASE 11
#define DATFILE "nxoptq32b.dat"
#endif
#endif
#ifdef EP3
#ifdef EO1
#define BASE 10
#define DATFILE "nxoptq13b.dat"
#endif
#ifdef EO2
#define BASE 11
#define DATFILE "nxoptq23b.dat"
#endif
#ifdef EO3
#define BASE 12
#define DATFILE "nxoptq33b.dat"
#endif
#endif
#ifdef EP4
#ifdef EO1
#define BASE 11
#define DATFILE "nxoptq14b.dat"
#endif
#ifdef EO2
#define BASE 13 // ??
#define DATFILE "nxoptq24b.dat"
#endif
#ifdef EO3
#define BASE 13 // ??
#define DATFILE "nxoptq34b.dat"
#endif
#endif
#endif
#ifdef AXIAL
static const char *metric = "AXIAL" ;
#ifdef EP1
#ifdef EO1
#define BASE 5
#define DATFILE "nxopta11b.dat"
#endif
#ifdef EO2
#define BASE 6
#define DATFILE "nxopta21b.dat"
#endif
#ifdef EO3
#define BASE 6
#define DATFILE "nxopta31b.dat"
#endif
#endif
#ifdef EP2
#ifdef EO1
#define BASE 6
#define DATFILE "nxopta12b.dat"
#endif
#ifdef EO2
#define BASE 6
#define DATFILE "nxopta22b.dat"
#endif
#ifdef EO3
#define BASE 7
#define DATFILE "nxopta32b.dat"
#endif
#endif
#ifdef EP3
#ifdef EO1
#define BASE 6
#define DATFILE "nxopta13b.dat"
#endif
#ifdef EO2
#define BASE 7
#define DATFILE "nxopta23b.dat"
#endif
#ifdef EO3
#define BASE 7
#define DATFILE "nxopta33b.dat"
#endif
#endif
#ifdef EP4
#ifdef EO1
#define BASE 7
#define DATFILE "nxopta14b.dat"
#endif
#ifdef EO2
#define BASE 8 // ??
#define DATFILE "nxopta24b.dat"
#endif
#ifdef EO3
#define BASE 8 // ??
#define DATFILE "nxopta34b.dat"
#endif
#endif
#endif
#ifdef SLICE
static const char *metric = "SLICE" ;
#ifdef EP1
#ifdef EO1
#define BASE 6
#define DATFILE "nxopts11b.dat"
#endif
#ifdef EO2
#define BASE 7
#define DATFILE "nxopts21b.dat"
#endif
#ifdef EO3
#define BASE 8
#define DATFILE "nxopts31b.dat"
#endif
#endif
#ifdef EP2
#ifdef EO1
#define BASE 7
#define DATFILE "nxopts12b.dat"
#endif
#ifdef EO2
#define BASE 8
#define DATFILE "nxopts22b.dat"
#endif
#ifdef EO3
#define BASE 9
#define DATFILE "nxopts32b.dat"
#endif
#endif
#ifdef EP3
#ifdef EO1
#define BASE 7
#define DATFILE "nxopts13b.dat"
#endif
#ifdef EO2
#define BASE 8
#define DATFILE "nxopts23b.dat"
#endif
#ifdef EO3
#define BASE 9
#define DATFILE "nxopts33b.dat"
#endif
#endif
#ifdef EP4
#ifdef EO1
#define BASE 8
#define DATFILE "nxopts14b.dat"
#endif
#ifdef EO2
#define BASE 9 // ??
#define DATFILE "nxopts24b.dat"
#endif
#ifdef EO3
#define BASE 9 // ??
#define DATFILE "nxopts34b.dat"
#endif
#endif
#endif
#ifdef OVERRIDEBASE
#undef BASE
#define BASE OVERRIDEBASE
#endif
int getedgecoord(const cubepos &cp, int m=0) {
ull s = 0 ;
for (int i=0; i<12; i++)
s += fastedges[m][i][cp.e[i]] ;
#ifdef EO3
int eoc = s & 0x7ff ;
#else
int eoc = eolo[s&0x3ff] + eohi[(s>>10)&0x3ff] ;
#endif
#ifdef USEEBOT
int ep1 = (s>>20)&0x3ff ;
int ep2 = (s>>30)&0x3ff ;
int epmc = emplo[ep1] + emphi[ep2] ;
int epbotc = ebplo[ep1] + ebphi[ep2] ;
#else
int epmc = pack11_9[(s>>20)&0x7ff] ;
int epbotc = 0 ;
#endif
#ifdef USEEPERM
epbotc = 24 * epbotc + fmeperm[s>>40] ;
#endif
int epadd = 2 * ((epmc + (epmc >> 5) + 65) >> 6) ;
return (epbotc << (E2BITS + 9)) + (eoc << 9) + epmc + epadd ;
}
/*
* This is not particularly speed-sensitive.
*/
void setedgecoord(cubepos &cp, int ecoord) {
int emocc = ecoord & 0x1ff ;
emocc -= 2 + 2 * (emocc >> 6) ;
#ifndef EO3
int paritybit = 0 ;
#endif
#ifdef EO1
int eoperm = (ecoord >> 9) & 0xf ;
if (bc(eoperm) & 1)
paritybit = 1 ;
#endif
#ifdef EO2
int eoperm = (ecoord >> 9) & 0xff ;
if (bc(eoperm) & 1)
paritybit = 1 ;
#endif
#ifdef EO3
int eoperm = (ecoord >> 9) & 0x7ff ;
if (bc(eoperm) & 1)
eoperm |= 0x800 ;
#endif
int ebocc = ecoord >> (E2BITS + 9) ;
#ifdef USEEPERM
int emperm = ebocc % 24 ;
#ifdef USEEBOT
ebocc /= 24 ;
#endif
#else
int emperm = 0 ;
#endif
unsigned char *mid = midperm[emperm] ;
emocc = c12_4expand[emocc] ;
ebocc = c8_4expand[ebocc] ;
if (bc(emocc) != 4) error("! bad emocc") ;
if (bc(ebocc) != 4) error("! bad ebocc") ;
static unsigned char topedges[4] = { 0, 2, 4, 6 } ;
static unsigned char botedges[4] = { 16, 18, 20, 22 } ;
unsigned char *top = topedges ;
unsigned char *bot = botedges ;
for (int i=0; i<12; i++, emocc >>= 1) {
int ii = (i + 4) % 12 ;
if (emocc & 1) {
cp.e[ii] = *mid++ ;
#ifndef EO2
cp.e[ii] |= eoperm & 1 ;
eoperm >>= 1 ;
#else
cp.e[ii] |= paritybit ;
paritybit = 0 ;
#endif
} else {
if (ebocc & 1) {
cp.e[ii] = *bot++ ;
} else {
cp.e[ii] = *top++ ;
}
ebocc >>= 1 ;
#ifndef EO1
cp.e[ii] |= eoperm & 1 ;
eoperm >>= 1 ;
#else
cp.e[ii] |= paritybit ;
paritybit = 0 ;
#endif
}
}
}
@ Expand fastedges for other mappings.
@(nxopt.cpp@>=
void initfillout() {
for (int m=1; m<M; m++) {
int mprime = cubepos::invm[m] ;
for (int i=0; i<12; i++)
for (int c=0; c<CUBIES; c++) {
int c1 = cubepos::rot_edge[mprime][i*2] ;
int i2 = cubepos::edge_perm(c1) ;
int c2 = cubepos::edge_ori_add(c, c1) ;
fastedges[mprime][i][cubepos::rot_edge[m][c2]] = fastedges[0][i2][c] ;
}
}
}
@ Finally we have more common code.
@(nxopt.cpp@>=
const int EDGECOOR = E1 * E2 ;
const size_t BIGMEMSIZE = (EDGECOOR+3)/4 ;
const size_t MEMOFFSET = (BIGMEMSIZE+3)/4 ;
const size_t MEMSIZE = 4 * MEMOFFSET ;
void initmem() {
mind *mindp = mindbuf ;
int totsiz = 0 ;
for (int j=0; j<CORNERSYMM; j++) {
mind mindt[C8_4] ;
memset(mindt, 0, sizeof(mindt)) ;
int siz = 0 ;
int mj = j ;
for (int i=0; i<C8_4; i++) {
int c = j*C8_4+i ;
int rm = 0 ;
for (int k=1; k<16; k++) {
cubepos cp, cp2 ;
int ii, jj ;
setcornercoords(cp, i, j) ;
cp.remap_into(k, cp2) ;
getcornercoords(cp2, ii, jj) ;
int tc = jj*C8_4+ii ;
if (tc < c) {
c = tc ;
rm = k ;
mj = jj ;
}
}
mindt[i].m = rm ;
if (c / C8_4 == j) {
if (c % C8_4 == i) {
mindt[i].ind = siz++ ;
} else {
mindt[i].ind = mindt[c % C8_4].ind ;
}
} else {
mindt[i].ind = firstlevs[c/C8_4].v[c % C8_4].ind ;
siz = -1 ;
}
}
firstlevs[j].levsiz = siz ;
for (int i=0; i<j; i++)
if (memcmp(mindt, firstlevs[i].v, sizeof(mindt)) == 0) {
firstlevs[j].v = firstlevs[i].v ;
break ;
}
if (firstlevs[j].v == 0) {
firstlevs[j].v = mindp ;
mindp += C8_4 ;
memcpy(firstlevs[j].v, mindt, sizeof(mindt)) ;
}
if (siz > 0) {
firstlevs[j].p = (unsigned int *)malloc(MEMSIZE * (size_t)siz) ;
firstlevs[j].levoff = totsiz ;
totsiz += siz ;
} else {
firstlevs[j].p = firstlevs[mj].p ;
firstlevs[j].levoff = firstlevs[mj].levoff ;
}
}
if (totsiz != CORNERUNIQ) {
error("! unexpected totsiz") ;
}
}
const int SIZE = 1000000 ;
unsigned char tomove[SIZE] ;
unsigned char tom[SIZE] ;
void test() {
cubepos cp, cp2, cp3 ;
for (int trial=0; trial<1000; trial++) {
int c = getedgecoord(cp) ;
setedgecoord(cp2, c) ;
int c2 = getedgecoord(cp2) ;
if (c != c2)
error("! bad match") ;
int mv = random_move() ;
cp.movepc(mv) ;
while (1) {
c = (int)(drand48()*E1*E2) ;
if ((c & 63) >= 2 && (c & 511) < 511)
break ;
}
setedgecoord(cp2, c) ;
c2 = getedgecoord(cp2) ;
if (c != c2) {
error("! bad match 2") ;
}
int m = (int)(16*drand48()) ;
c = getedgecoord(cp2, m) ;
cp2.remap_into(m, cp3) ;
c2 = getedgecoord(cp3, 0) ;
if (c != c2)
cout << "! botch in remap at " << m << endl ;
}
for (int i=0; i<SIZE; i++) {
tomove[i] = random_move() ;
tom[i] = (int)(16*drand48()) ;
}
duration() ;
int s = 0 ;
for (int j=0; j<10; j++)
for (int i=0; i<SIZE; i++) {
int cperm, cori ;
getcornercoords(cp, cperm, cori) ;
s += cperm + cori ;
cp.move(tomove[i]) ;
}
cout << "Corner in " << duration() << " " << s << endl ;
for (int j=0; j<10; j++)
for (int i=0; i<SIZE; i++) {
s += getedgecoord(cp, tom[i]) ;
cp.move(tomove[i]) ;
}
cout << "Edge in " << duration() << " " << s << endl ;
}
unsigned char skipata[NMOVES] ;
long long expandm[8] ;
void init() {
int j = 0 ;
for (int i=0; i<256; i++)
if (bc(i) == 4) {
c8_4crush[i] = j ;
c8_4expand[j] = i ;
c8_4crush[i & 127] = j ; // don't require last one
j++ ;
}
for (int mv=0; mv<NMOVES; mv++)
skipata[mv] = mv / TWISTS % 3 ;
for (int i=0; i<8; i++)
for (int mv=0; mv<NMOVES; mv++)
if (((i >> skipata[mv]) & 1) == 0)
expandm[i] |= 1LL << mv ;
initfastcc() ;
initeo() ;
initep() ;
initfillout() ;
// test() ;
initmem() ;
}
int popcount64(long long v) {
return __builtin_popcountll(v) ;
}
struct efast {
int base, bitoff ;
} emove[NMOVES][E1], emap[16][E1] ;
int bitarr[E2*MAXB] ;
map<vector<int>, int> e2offmap ;
int finde2bits(int *bits) {
vector<int> key(E2BITS) ;
for (int bi=0; bi<E2BITS; bi++)
key[bi] = bits[bi] ;
if (e2offmap.find(key) == e2offmap.end()) {
int bits2 = e2offmap.size() * E2 ;
if (bits2 >= E2*MAXB)
error("! mistake while generating bits") ;
for (int bi=0; bi<E2BITS; bi++)
for (int i=1<<bi; i<E2; i=(i+1)|(1<<bi))
bitarr[bits2+i] ^= bits[bi] ;
e2offmap[key] = bits2 ;
}
return e2offmap[key] ;
}
void calcecoords() {
cubepos cp, cp2, cp3, cp4 ;
int bits[11] ;
for (int ep=0; ep<E1; ep++) {
if ((ep & 63) < 2 || (ep & 511) == 511)
continue ;
int baseep = (ep & 511) | ((ep & ~511) << E2BITS) ;
for (int mv=0; mv<NMOVES; mv++) {
setedgecoord(cp, baseep) ;
cp.movepc(mv) ;
int dec = getedgecoord(cp) ;
emove[mv][ep].base = dec ;
for (int bi=0, eo=1; eo<E2; eo += eo, bi++) {
setedgecoord(cp3, baseep + (eo << 9)) ;
cp3.movepc(mv) ;
bits[bi] = dec ^ getedgecoord(cp3) ;
}
emove[mv][ep].bitoff = finde2bits(bits) ;
}
for (int m=0; m<16; m++) {
setedgecoord(cp, baseep) ;
cp.remap_into(m, cp2) ;
int dec = getedgecoord(cp2) ;
emap[m][ep].base = dec ;
for (int bi=0, eo=1; eo<E2; eo += eo, bi++) {
setedgecoord(cp3, baseep + (eo << 9)) ;
cp3.remap_into(m, cp4) ;
bits[bi] = dec ^ getedgecoord(cp4) ;
}
emap[m][ep].bitoff = finde2bits(bits) ;
}
}
}
#ifdef _WIN32
#include <xmmintrin.h>
inline void prefetch(void *p) { _mm_prefetch((const char *)p, _MM_HINT_T1); }
#else
inline void prefetch(void *p) { __builtin_prefetch(p); }
#endif
long long have = 0, smhave = 0 ;
int globald ;
#ifdef PS
struct prefetch_gen_t {
int dec ;
} ;
const int PREFETCH_SIZE=PS ;
#endif
void dorow(unsigned int *srcpa, long long &local_have, long long &local_smhave,
unsigned int *dstp, int d3, int mv, int m) {
unsigned long long *srcp = (unsigned long long *)srcpa ;
int ds = (d3 + 1) % 3 ;
ds = 3 - ds ;
int ec = 0 ;
cubepos cp, cp2 ;
efast *emovemv = emove[mv] ;
efast *emapm = emap[m] ;
unsigned long long d3x = (3 - d3) * 0x5555555555555555LL ;
#ifdef PS
int pgpc = 0 ;
prefetch_gen_t pgt[PREFETCH_SIZE] ;
for (int i=0; i<PREFETCH_SIZE; i++)
pgt[i].dec = -1 ;
#endif
for (int ep=0; ep<E1; ep += 512) {
for (int eo=0; eo<E2; eo++) {
for (int epm=0; epm<511; epm += 32, ec++) {
unsigned long long rm = srcp[ec] ;
unsigned long long t = rm ^ d3x ;
if ((epm & 63) == 0) {
if ((rm & 15) >= (unsigned int)globald) {
epm += 32 ;
ec += 1 ;
continue ;
}
t &= ~0xf ;
}
t = (t & (t >> 1) & 0x5555555555555555LL) ;
while (t) {
int bp = ffsll(t) >> 1 ;
t &= t-1 ;
int e2 = emovemv[ep+epm+bp].base ^ bitarr[emovemv[ep+epm+bp].bitoff + eo] ;
int ep2 = (e2 & 511) + ((e2 >> E2BITS) & ~511) ;
int eo2 = (e2 >> 9) & (E2 - 1) ;
int dec = emapm[ep2].base ^ bitarr[emapm[ep2].bitoff + eo2] ;
#ifdef PS
if (pgt[pgpc].dec >= 0) {
int pdec = pgt[pgpc].dec ;
int dv = dstp[pdec>>4] ;
int dmask = (2 - ((dv >> (2*(pdec & 15))) & 3)) >> 2 ;
local_have -= dmask ;
dstp[pdec>>4] = dv - ((dmask & ds) << (2*(pdec & 15))) ;
pdec = (pdec & ~63) >> 4 ;
dv = dstp[pdec] ;
dmask = (14 - (dv & 15)) >> 4 ;
dstp[pdec] = dv - (dmask & (15 - globald)) ;
local_smhave -= dmask ;
}
prefetch(dstp+(dec>>4)) ;
pgt[pgpc].dec = dec ;
pgpc = (pgpc + 1) & (PREFETCH_SIZE-1) ;
#else
int dv = dstp[dec>>4] ;
int dmask = (2 - ((dv >> (2*(dec & 15))) & 3)) >> 2 ;
local_have -= dmask ;
dstp[dec>>4] = dv - ((dmask & ds) << (2*(dec & 15))) ;
dec = (dec & ~63) >> 4 ;
dv = dstp[dec] ;
dmask = (14 - (dv & 15)) >> 4 ;
dstp[dec] = dv - (dmask & (15 - globald)) ;
local_smhave -= dmask ;
#endif
}
}
}
}
#ifdef PS
for (int i=0; i<PREFETCH_SIZE; i++) {
if (pgt[i].dec >= 0) {
int pdec = pgt[i].dec ;
int dv = dstp[pdec>>4] ;
int dmask = (2 - ((dv >> (2*(pdec & 15))) & 3)) >> 2 ;
local_have -= dmask ;
dstp[pdec>>4] = dv - ((dmask & ds) << (2*(pdec & 15))) ;
pdec = (pdec & ~63) >> 4 ;
dv = dstp[pdec] ;
dmask = (14 - (dv & 15)) >> 4 ;
dstp[pdec] = dv - (dmask & (15 - globald)) ;
local_smhave -= dmask ;
}
}
#endif
if (ec != (E1 * E2) >> 5)
error("! oops 12") ;
}
void writetab() {
duration() ;
FILE *f = fopen(DATFILE, "wb") ;
if (f == 0)
error("! cannot write file") ;
fputc('N', f) ;
fputc('X', f) ;
fputc(*metric, f) ;