-
-
Notifications
You must be signed in to change notification settings - Fork 39
/
paranoia.c
3155 lines (2722 loc) · 105 KB
/
paranoia.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Copyright (C) 2004, 2005, 2006, 2008, 2011, 2017
Rocky Bernstein <[email protected]>
Copyright (C) 2014 Robert Kausch <[email protected]>
Copyright (C) 1998 Monty [email protected]
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/***
* Toplevel file for the paranoia abstraction over the cdda lib
*
***/
/* immediate todo:: */
/* Allow disabling of root fixups? */
/* Dupe bytes are creeping into cases that require greater overlap
than a single fragment can provide. We need to check against a
larger area* (+/-32 sectors of root?) to better eliminate
dupes. Of course this leads to other problems... Is it actually a
practically solvable problem? */
/* Bimodal overlap distributions break us. */
/* scratch detection/tolerance not implemented yet */
/***************************************************************
Da new shtick: verification now a two-step assymetric process.
A single 'verified/reconstructed' data segment cache, and then the
multiple fragment cache
verify a newly read block against previous blocks; do it only this
once. We maintain a list of 'verified sections' from these matches.
We then glom these verified areas into a new data buffer.
Defragmentation fixups are allowed here alone.
We also now track where read boundaries actually happened; do not
verify across matching boundaries.
**************************************************************/
/***************************************************************
Silence. "It's BAAAAAAaaack."
audio is now treated as great continents of values floating on a
mantle of molten silence. Silence is not handled by basic
verification at all; we simply anchor sections of nonzero audio to a
position and fill in everything else as silence. We also note the
audio that interfaces with silence; an edge must be 'wet'.
**************************************************************/
/* ===========================================================================
* Let's translate the above vivid metaphor into something a mere mortal
* can understand:
*
* Non-silent audio is "solid." Silent audio is "wet" and fluid. The reason
* to treat silence as fluid is that if there's a long enough span of
* silence, we can't reliably detect jitter or dropped samples within that
* span (since all silence looks alike). Non-silent audio, on the other
* hand, is distinctive and can be reliably reassembled.
*
* So we treat long spans of silence specially. We only consider an edge
* of a non-silent region ("continent" or "island") to be "wet" if it borders
* a long span of silence. Short spans of silence are merely damp and can
* be reliably placed within a continent.
*
* We position ("anchor") the non-silent regions somewhat arbitrarily (since
* they may be jittered and we have no way to verify their exact position),
* and fill the intervening space with silence.
*
* See i_silence_match() for the gory details.
* ===========================================================================
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
# define __CDIO_CONFIG_H__ 1
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <unistd.h>
#include <stdio.h>
#include <limits.h>
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <math.h>
#include <cdio/paranoia/cdda.h>
#include "../cdda_interface/smallft.h"
#include <cdio/paranoia/version.h>
#include "p_block.h"
#include <cdio/paranoia/paranoia.h>
#include "overlap.h"
#include "gap.h"
#include "isort.h"
#include <errno.h>
#define MIN_SEEK_MS 6
const char *paranoia_cb_mode2str[] = {
"read",
"verify",
"fixup edge",
"fixup atom",
"scratch",
"repair",
"skip",
"drift",
"backoff",
"overlap",
"fixup dropped",
"fixup duplicated",
"read error"
};
/** The below variables are trickery to force the above enum symbol
values to be recorded in debug symbol tables. They are used to
allow one to refer to the enumeration value names in the typedefs
above in a debugger and debugger expressions
*/
paranoia_mode_t debug_paranoia_mode;
paranoia_cb_mode_t debug_paranoia_cb_mode;
static inline long
re(root_block *root)
{
if (!root)return(-1);
if (!root->vector)return(-1);
return(ce(root->vector));
}
static inline long
rb(root_block *root)
{
if (!root)return(-1);
if (!root->vector)return(-1);
return(cb(root->vector));
}
static inline
long rs(root_block *root)
{
if (!root)return(-1);
if (!root->vector)return(-1);
return(cs(root->vector));
}
static inline int16_t *
rv(root_block *root){
if (!root)return(NULL);
if (!root->vector)return(NULL);
return(cv(root->vector));
}
#define rc(r) (r->vector)
/**
Flags indicating the status of a read samples.
Imagine the below enumeration values are \#defines to be used in a
bitmask rather than distinct values of an enum.
The variable part of the declaration is trickery to force the enum
symbol values to be recorded in debug symbol tables. They are used
to allow one refer to the enumeration value names in a debugger
and in debugger expressions.
*/
enum {
FLAGS_EDGE =0x1, /**< first/last N words of frame */
FLAGS_UNREAD =0x2, /**< unread, hence missing and unmatchable */
FLAGS_VERIFIED=0x4 /**< block read and verified */
} paranoia_read_flags;
/**** matching and analysis code *****************************************/
/* ===========================================================================
* i_paranoia_overlap() (internal)
*
* This function is called when buffA[offsetA] == buffB[offsetB]. This
* function searches backward and forward to see how many consecutive
* samples also match.
*
* This function is called by do_const_sync() when we're not doing any
* verification. Its more complicated sibling is i_paranoia_overlap2.
*
* This function returns the number of consecutive matching samples.
* If (ret_begin) or (ret_end) are not NULL, it fills them with the
* offsets of the first and last matching samples in A.
*/
static inline long
i_paranoia_overlap(int16_t *buffA,int16_t *buffB,
long offsetA, long offsetB,
long sizeA,long sizeB,
long *ret_begin, long *ret_end)
{
long beginA=offsetA,endA=offsetA;
long beginB=offsetB,endB=offsetB;
/* Scan backward to extend the matching run in that direction. */
for(; beginA>=0 && beginB>=0; beginA--,beginB--)
if (buffA[beginA] != buffB[beginB]) break;
beginA++;
beginB++;
/* Scan forward to extend the matching run in that direction. */
for(; endA<sizeA && endB<sizeB; endA++,endB++)
if (buffA[endA] != buffB[endB]) break;
/* Return the result of our search. */
if (ret_begin) *ret_begin = beginA;
if (ret_end) *ret_end = endA;
return (endA-beginA);
}
/* ===========================================================================
* i_paranoia_overlap2() (internal)
*
* This function is called when buffA[offsetA] == buffB[offsetB]. This
* function searches backward and forward to see how many consecutive
* samples also match.
*
* This function is called by do_const_sync() when we're verifying the
* data coming off the CD. Its less complicated sibling is
* i_paranoia_overlap, which is a good place to look to see the simplest
* outline of how this function works.
*
* This function returns the number of consecutive matching samples.
* If (ret_begin) or (ret_end) are not NULL, it fills them with the
* offsets of the first and last matching samples in A.
*/
static inline long
i_paranoia_overlap2(int16_t *buffA,int16_t *buffB,
unsigned char *flagsA,
unsigned char *flagsB,
long offsetA, long offsetB,
long sizeA,long sizeB,
long *ret_begin, long *ret_end)
{
long beginA=offsetA, endA=offsetA;
long beginB=offsetB, endB=offsetB;
/* Scan backward to extend the matching run in that direction. */
for (; beginA>=0 && beginB>=0; beginA--,beginB--) {
if (buffA[beginA] != buffB[beginB]) break;
/* don't allow matching across matching sector boundaries. The
liklihood of the drive skipping identically in two
different reads with the same sector read boundary is actually
relatively very high compared to the liklihood of it skipping
when one read is continuous across the boundary and other was
discontinuous */
/* Stop if both samples were at the edges of a low-level read.
* ???: What implications does this have?
* ???: Why do we include the first sample for which this is true?
*/
if ((flagsA[beginA]&flagsB[beginB]&FLAGS_EDGE)) {
beginA--;
beginB--;
break;
}
/* don't allow matching through known missing data */
if ((flagsA[beginA]&FLAGS_UNREAD) || (flagsB[beginB]&FLAGS_UNREAD))
break;
}
beginA++;
beginB++;
/* Scan forward to extend the matching run in that direction. */
for (; endA<sizeA && endB<sizeB; endA++,endB++) {
if (buffA[endA] != buffB[endB]) break;
/* don't allow matching across matching sector boundaries */
/* Stop if both samples were at the edges of a low-level read.
* ???: What implications does this have?
* ???: Why do we not stop if endA == beginA?
*/
if ((flagsA[endA]&flagsB[endB]&FLAGS_EDGE) && endA!=beginA){
break;
}
/* don't allow matching through known missing data */
if ((flagsA[endA]&FLAGS_UNREAD) || (flagsB[endB]&FLAGS_UNREAD))
break;
}
/* Return the result of our search. */
if (ret_begin) *ret_begin = beginA;
if (ret_end) *ret_end = endA;
return (endA-beginA);
}
/* ===========================================================================
* do_const_sync() (internal)
*
* This function is called when samples A[posA] == B[posB]. It tries to
* build a matching run from that point, looking forward and backward to
* see how many consecutive samples match. Since the starting samples
* might only be coincidentally identical, we only consider the run to
* be a true match if it's longer than MIN_WORDS_SEARCH.
*
* This function returns the length of the run if a matching run was found,
* or 0 otherwise. If a matching run was found, (begin) and (end) are set
* to the absolute positions of the beginning and ending samples of the
* run in A, and (offset) is set to the jitter between the c_blocks.
* (I.e., offset indicates the distance between what A considers sample N
* on the CD and what B considers sample N.)
*/
static inline long int
do_const_sync(c_block_t *A,
sort_info_t *B,
unsigned char *flagB,
long posA, long posB,
long *begin, long *end, long *offset)
{
unsigned char *flagA=A->flags;
long ret=0;
/* If we're doing any verification whatsoever, we have flags in stage
* 1, and will take them into account. Otherwise (e.g. in stage 2),
* we just do the simple equality test for samples on both sides of
* the initial match.
*/
if (flagB==NULL)
ret=i_paranoia_overlap(cv(A), iv(B), posA, posB,
cs(A), is(B), begin, end);
else
if ((flagB[posB]&FLAGS_UNREAD)==0)
ret=i_paranoia_overlap2(cv(A), iv(B), flagA, flagB,
posA, posB, cs(A), is(B),
begin, end);
/* Small matching runs could just be coincidental. We only consider this
* a real match if it's long enough.
*/
if (ret > MIN_WORDS_SEARCH) {
*offset=+(posA+cb(A))-(posB+ib(B));
/* Note that try_sort_sync()'s swaps A & B when it calls this function,
* so while we adjust begin & end to be relative to A here, that means
* it's relative to B in try_sort_sync().
*/
*begin+=cb(A);
*end+=cb(A);
return(ret);
}
return(0);
}
/* ===========================================================================
* try_sort_sync() (internal)
*
* Starting from the sample in B with the absolute position (post), look
* for a matching run in A. This search will look in A for a first
* matching sample within (p->dynoverlap) samples around (post). If it
* finds one, it will then determine how many consecutive samples match
* both A and B from that point, looking backwards and forwards. If
* this search produces a matching run longer than MIN_WORDS_SEARCH, we
* consider it a match.
*
* When used by stage 1, the "post" is planted with respect to the old
* c_block being compare to the new c_block. In stage 2, the "post" is
* planted with respect to the verified root.
*
* This function returns 1 if a match is found and 0 if not. When a match
* is found, (begin) and (end) are set to the boundaries of the run, and
* (offset) is set to the difference in position of the run in A and B.
* (begin) and (end) are the absolute positions of the samples in
* B. (offset) transforms A to B's frame of reference. I.e., an offset of
* 2 would mean that A's absolute 3 is equivalent to B's 5.
*/
/* post is w.r.t. B. in stage one, we post from old. In stage 2 we
post from root. Begin, end, offset count from B's frame of
reference */
static inline long int
try_sort_sync(cdrom_paranoia_t *p,
sort_info_t *A, unsigned char *Aflags,
c_block_t *B,
long int post,
long int *begin,
long int *end,
long *offset,
void (*callback)(long int, paranoia_cb_mode_t))
{
long int dynoverlap=p->dynoverlap;
sort_link_t *ptr=NULL;
unsigned char *Bflags=B->flags;
/* block flag matches FLAGS_UNREAD (and hence unmatchable) */
if (Bflags==NULL || (Bflags[post-cb(B)]&FLAGS_UNREAD)==0){
/* always try absolute offset zero first! */
{
long zeropos=post-ib(A);
if (zeropos>=0 && zeropos<is(A)) {
/* Before we bother with the search for a matching samples,
* we check the simple case. If there's no jitter at all
* (i.e. the absolute positions of A's and B's samples are
* consistent), A's sample at (post) should be identical
* to B's sample at the same position.
*/
if ( cv(B)[post-cb(B)] == iv(A)[zeropos] ) {
/* The first sample matched, now try to grow the matching run
* in both directions. We only consider it a match if more
* than MIN_WORDS_SEARCH consecutive samples match.
*/
if (do_const_sync(B, A, Aflags,
post-cb(B), zeropos,
begin, end, offset) ) {
/* ???BUG??? Jitter cannot be accurately detected when there are
* large regions of silence. Silence all looks alike, so if
* there is actually jitter but lots of silence, jitter (offset)
* will be incorrectly identified as 0. When the incorrect zero
* jitter is passed to offset_add_value, it eventually reduces
* dynoverlap so much that it's impossible for stage 2 to merge
* jittered fragments into the root (it doesn't search far enough).
*
* A potential solution (tested, but not committed) is to check
* for silence in do_const_sync and simply not call
* offset_add_value if the match is all silence.
*
* This bug is not fixed yet.
*/
/* ???: To be studied. */
offset_add_value(p,&(p->stage1),*offset,callback);
return(1);
}
}
}
}
} else
return(0);
/* If the samples with the same absolute position didn't match, it's
* either a bad sample, or the two c_blocks are jittered with respect
* to each other. Now we search through A for samples that do have
* the same value as B's post. The search looks from first to last
* occurrence witin (dynoverlap) samples of (post).
*/
ptr=sort_getmatch(A,post-ib(A),dynoverlap,cv(B)[post-cb(B)]);
while (ptr){
/* We've found a matching sample, so try to grow the matching run in
* both directions. If we find a long enough run (longer than
* MIN_WORDS_SEARCH), we've found a match.
*/
if (do_const_sync(B,A,Aflags,
post-cb(B),ipos(A,ptr),
begin,end,offset)){
/* ???BUG??? Jitter cannot be accurately detected when there are
* large regions of silence. Silence all looks alike, so if
* there is actually jitter but lots of silence, jitter (offset)
* will be incorrectly identified as 0. When the incorrect zero
* jitter is passed to offset_add_value, it eventually reduces
* dynoverlap so much that it's impossible for stage 2 to merge
* jittered fragments into the root (it doesn't search far enough).
*
* A potential solution (tested, but not committed) is to check
* for silence in do_const_sync and simply not call
* offset_add_value if the match is all silence.
*
* This bug is not fixed yet.
*/
/* ???: To be studied. */
offset_add_value(p,&(p->stage1),*offset,callback);
return(1);
}
/* The matching sample was just a fluke -- there weren't enough adjacent
* samples that matched to consider a matching run. So now we check
* for the next occurrence of that value in A.
*/
ptr=sort_nextmatch(A,ptr);
}
/* We didn't find any matches. */
*begin=-1;
*end=-1;
*offset=-1;
return(0);
}
/* ===========================================================================
* STAGE 1 MATCHING
*
* ???: Insert high-level explanation here.
* ===========================================================================
*/
/* Top level of the first stage matcher */
/* We match each analysis point of new to the preexisting blocks
recursively. We can also optionally maintain a list of fragments of
the preexisting block that didn't match anything, and match them back
afterward. */
#define OVERLAP_ADJ (MIN_WORDS_OVERLAP/2-1)
/* ===========================================================================
* stage1_matched() (internal)
*
* This function is called whenever stage 1 verification finds two identical
* runs of samples from different reads. The runs must be more than
* MIN_WORDS_SEARCH samples long. They may be jittered (i.e. their absolute
* positions on the CD may not match due to inaccurate seeking) with respect
* to each other, but they have been verified to have no dropped samples
* within them.
*
* This function provides feedback via the callback mechanism and marks the
* runs as verified. The details of the marking are somehwat subtle and
* are described near the relevant code.
*
* Subsequent portions of the stage 1 code will build a verified fragment
* from this run. The verified fragment will eventually be merged
* into the verified root (and its absolute position determined) in
* stage 2.
*/
static inline void
stage1_matched(c_block_t *old, c_block_t *new,
long matchbegin,long matchend,
long matchoffset,
void (*callback)(long int, paranoia_cb_mode_t))
{
long i;
long oldadjbegin=matchbegin-cb(old);
long oldadjend=matchend-cb(old);
long newadjbegin=matchbegin-matchoffset-cb(new);
long newadjend=matchend-matchoffset-cb(new);
/* Provide feedback via the callback about the samples we've just
* verified.
*
* "???: How can matchbegin ever be < cb(old)?"
* Sorry, old bulletproofing habit. I often use <= to mean "not >"
* --Monty
*
* "???: Why do edge samples get logged only when there's jitter
* between the matched runs (matchoffset != 0)?"
* FIXUP_EDGE is actually logging a jitter event, not a rift--
* a rift is FIXUP_ATOM --Monty
*/
if ( matchbegin-matchoffset<=cb(new)
|| matchbegin<=cb(old)
|| (new->flags[newadjbegin]&FLAGS_EDGE)
|| (old->flags[oldadjbegin]&FLAGS_EDGE) ) {
if ( matchoffset && callback )
(*callback)(matchbegin,PARANOIA_CB_FIXUP_EDGE);
} else
if (callback)
(*callback)(matchbegin,PARANOIA_CB_FIXUP_ATOM);
if ( matchend-matchoffset>=ce(new) ||
(new->flags[newadjend]&FLAGS_EDGE) ||
matchend>=ce(old) ||
(old->flags[oldadjend]&FLAGS_EDGE) ) {
if ( matchoffset && callback )
(*callback)(matchend,PARANOIA_CB_FIXUP_EDGE);
} else
if (callback)
(*callback)(matchend, PARANOIA_CB_FIXUP_ATOM);
#if TRACE_PARANOIA & 1
fprintf(stderr, "- Matched [%ld-%ld] against [%ld-%ld]\n",
newadjbegin+cb(new), newadjend+cb(new),
oldadjbegin+cb(old), oldadjend+cb(old));
#endif
/* Mark verified samples as "verified," but trim the verified region
* by OVERLAP_ADJ samples on each side. There are several significant
* implications of this trimming:
*
* 1) Why we trim at all: We have to trim to distinguish between two
* adjacent verified runs and one long verified run. We encounter this
* situation when samples have been dropped:
*
* matched portion of read 1 ....)(.... matched portion of read 1
* read 2 adjacent run .....)(..... read 2 adjacent run
* ||
* dropped samples in read 2
*
* So at this point, the fact that we have two adjacent runs means
* that we have not yet verified that the two runs really are adjacent.
* (In fact, just the opposite: there are two runs because they were
* matched by separate runs, indicating that some samples didn't match
* across the length of read 2.)
*
* If we verify that they are actually adjacent (e.g. if the two runs
* are simply a result of matching runs from different reads, not from
* dropped samples), we will indeed mark them as one long merged run.
*
* 2) Why we trim by this amount: We want to ensure that when we
* verify the relationship between these two runs, we do so with
* an overlapping fragment at least OVERLAP samples long. Following
* from the above example:
*
* (..... matched portion of read 3 .....)
* read 2 adjacent run .....)(..... read 2 adjacent run
*
* Assuming there were no dropped samples between the adjacent runs,
* the matching portion of read 3 will need to be at least OVERLAP
* samples long to mark the two runs as one long verified run.
* If there were dropped samples, read 3 wouldn't match across the
* two runs, proving our caution worthwhile.
*
* 3) Why we partially discard the work we've done: We don't.
* When subsequently creating verified fragments from this run,
* we compensate for this trimming. Thus the verified fragment will
* contain the full length of verified samples. Only the c_blocks
* will reflect this trimming.
*
* ???: The comment below indicates that the sort cache is updated in
* some way, but this does not appear to be the case.
*/
/* Mark the verification flags. Don't mark the first or
last OVERLAP/2 elements so that overlapping fragments
have to overlap by OVERLAP to actually merge. We also
remove elements from the sort such that later sorts do
not have to sift through already matched data */
newadjbegin+=OVERLAP_ADJ;
newadjend-=OVERLAP_ADJ;
for(i=newadjbegin;i<newadjend;i++)
new->flags[i]|=FLAGS_VERIFIED; /* mark verified */
oldadjbegin+=OVERLAP_ADJ;
oldadjend-=OVERLAP_ADJ;
for(i=oldadjbegin;i<oldadjend;i++)
old->flags[i]|=FLAGS_VERIFIED; /* mark verified */
}
/* ===========================================================================
* i_iterate_stage1 (internal)
*
* This function is called by i_stage1() to compare newly read samples with
* previously read samples, searching for contiguous runs of identical
* samples. Matching runs indicate that at least two reads of the CD
* returned identical data, with no dropped samples in that run.
* The runs may be jittered (i.e. their absolute positions on the CD may
* not be accurate due to inaccurate seeking) at this point. Their
* positions will be determined in stage 2.
*
* This function compares the new c_block (which has been indexed in
* p->sortcache) to a previous c_block. It is called for each previous
* c_block. It searches for runs of identical samples longer than
* MIN_WORDS_SEARCH. Samples in matched runs are marked as verified.
*
* Subsequent stage 1 code builds verified fragments from the runs of
* verified samples. These fragments are merged into the verified root
* in stage 2.
*
* This function returns the number of distinct runs verified in the new
* c_block when compared against this old c_block.
*/
static long int
i_iterate_stage1(cdrom_paranoia_t *p, c_block_t *old, c_block_t *new,
void(*callback)(long int, paranoia_cb_mode_t))
{
long matchbegin = -1;
long matchend = -1;
long matchoffset;
/* "???: Why do we limit our search only to the samples with overlapping
* absolute positions? It could be because it eliminates some further
* bounds checking."
* Short answer is yes --Monty
*
* "Why do we "no longer try to spread the ... search" as mentioned
* below?"
* The search is normally much faster without the spread,
* even in heavy jitter. Dynoverlap tends to be a bigger deal in
* stage 2. --Monty
*/
/* we no longer try to spread the stage one search area by dynoverlap */
long searchend = min(ce(old), ce(new));
long searchbegin = max(cb(old), cb(new));
long searchsize = searchend-searchbegin;
sort_info_t *i = p->sortcache;
long ret = 0;
long int j;
long tried = 0;
long matched = 0;
if (searchsize<=0)
return(0);
/* match return values are in terms of the new vector, not old */
/* "???: Why 23?" Odd, prime number --Monty */
for (j=searchbegin; j<searchend; j+=23) {
/* Skip past any samples verified in previous comparisons to
* other old c_blocks. Also, obviously, don't bother verifying
* unread/unmatchable samples.
*/
if ((new->flags[j-cb(new)] & (FLAGS_VERIFIED|FLAGS_UNREAD)) == 0) {
tried++;
/* Starting from the sample in the old c_block with the absolute
* position j, look for a matching run in the new c_block. This
* search will look a certain distance around j, and if successful
* will extend the matching run as far backward and forward as
* it can.
*
* The search will only return 1 if it finds a matching run long
* enough to be deemed significant.
*/
if (try_sort_sync(p, i, new->flags, old, j,
&matchbegin, &matchend, &matchoffset,
callback) == 1) {
matched+=matchend-matchbegin;
/* purely cosmetic: if we're matching zeros, don't use the
callback because they will appear to be all skewed */
{
long j = matchbegin-cb(old);
long end = matchend-cb(old);
for (; j<end; j++) if (cv(old)[j]!=0) break;
/* Mark the matched samples in both c_blocks as verified.
* In reality, not all the samples are marked. See
* stage1_matched() for details.
*/
if (j<end) {
stage1_matched(old,new,matchbegin,matchend,matchoffset,callback);
} else {
stage1_matched(old,new,matchbegin,matchend,matchoffset,NULL);
}
}
ret++;
/* Skip past this verified run to look for more matches. */
if (matchend-1 > j)
j = matchend-1;
}
}
} /* end for */
#ifdef NOISY
fprintf(stderr,"iterate_stage1: search area=%ld[%ld-%ld] tried=%ld matched=%ld spans=%ld\n",
searchsize,searchbegin,searchend,tried,matched,ret);
#endif
return(ret);
}
/* ===========================================================================
* i_stage1() (internal)
*
* Compare newly read samples against previously read samples, searching
* for contiguous runs of identical samples. Matching runs indicate that
* at least two reads of the CD returned identical data, with no dropped
* samples in that run. The runs may be jittered (i.e. their absolute
* positions on the CD may not be accurate due to inaccurate seeking) at
* this point. Their positions will be determined in stage 2.
*
* This function compares a new c_block against all other c_blocks in memory,
* searching for sufficiently long runs of identical samples. Since each
* c_block represents a separate call to read_c_block, this ensures that
* multiple reads have returned identical data. (Additionally, read_c_block
* varies the reads so that multiple reads are unlikely to produce identical
* errors, so any matches between reads are considered verified. See
* i_read_c_block for more details.)
*
* Each time we find such a run (longer than MIN_WORDS_SEARCH), we mark
* the samples as "verified" in both c_blocks. Runs of verified samples in
* the new c_block are promoted into verified fragments, which will later
* be merged into the verified root in stage 2.
*
* In reality, not all the verified samples are marked as "verified."
* See stage1_matched() for an explanation.
*
* This function returns the number of verified fragments created by the
* stage 1 matching.
*/
static long int
i_stage1(cdrom_paranoia_t *p, c_block_t *p_new,
void (*callback)(long int, paranoia_cb_mode_t))
{
long size=cs(p_new);
c_block_t *ptr=c_last(p);
int ret=0;
long int begin=0;
long int end;
#if TRACE_PARANOIA & 1
long int block_count = 0;
fprintf(stderr,
"Verifying block %ld:[%ld-%ld] against previously read blocks...\n",
p->cache->active,
cb(p_new), ce(p_new));
#endif
/* We're going to be comparing the new c_block against the other
* c_blocks in memory. Initialize the "sort cache" index to allow
* for fast searching through the new c_block. (The index will
* actually be built the first time we search.)
*/
if (ptr)
sort_setup( p->sortcache, cv(p_new), &cb(p_new), cs(p_new), cb(p_new),
ce(p_new) );
/* Iterate from oldest to newest c_block, comparing the new c_block
* to each, looking for a sufficiently long run of identical samples
* (longer than MIN_WORDS_SEARCH), which will be marked as "verified"
* in both c_blocks.
*
* Since the new c_block is already in the list (at the head), don't
* compare it against itself.
*/
while ( ptr && ptr != p_new ) {
#if TRACE_PARANOIA & 1
block_count++;
fprintf(stderr,
"- Verifying against block %ld:[%ld-%ld] dynoverlap=%ld\n",
block_count, cb(ptr), ce(ptr), p->dynoverlap);
#endif
if (callback)
(*callback)(cb(p_new), PARANOIA_CB_VERIFY);
i_iterate_stage1(p,ptr,p_new,callback);
ptr=c_prev(ptr);
}
/* parse the verified areas of p_new into v_fragments */
/* Find each run of contiguous verified samples in the new c_block
* and create a verified fragment from each run.
*/
begin=0;
while (begin<size) {
for ( ; begin < size; begin++)
if (p_new->flags[begin]&FLAGS_VERIFIED) break;
for (end=begin; end < size; end++)
if ((p_new->flags[end]&FLAGS_VERIFIED)==0) break;
if (begin>=size) break;
ret++;
/* We create a new verified fragment from the contiguous run
* of verified samples.
*
* We expand the "verified" range by OVERLAP_ADJ on each side
* to compensate for trimming done to the verified range by
* stage1_matched(). The samples were actually verified, and
* hence belong in the verified fragment. See stage1_matched()
* for an explanation of the trimming.
*/
new_v_fragment(p,p_new,cb(p_new)+max(0,begin-OVERLAP_ADJ),
cb(p_new)+min(size,end+OVERLAP_ADJ),
(end+OVERLAP_ADJ>=size && p_new->lastsector));
begin=end;
}
/* Return the number of distinct verified fragments we found with
* stage 1 matching.
*/
return(ret);
}
/* ===========================================================================
* STAGE 2 MATCHING
*
* ???: Insert high-level explanation here.
* ===========================================================================
*/
typedef struct sync_result {
long offset;
long begin;
long end;
} sync_result_t;
/* Reconcile v_fragments to root buffer. Free if matched, fragment/fixup root
if necessary.
Do *not* match using zero posts
*/
/* ===========================================================================
* i_iterate_stage2 (internal)
*
* This function searches for a sufficiently long run of identical samples
* between the passed verified fragment and the verified root. The search
* is similar to that performed by i_iterate_stage1. Of course, what we do
* as a result of a match is different.
*
* Our search is slightly different in that we refuse to match silence to
* silence. All silence looks alike, and it would result in too many false
* positives here, so we handle silence separately.
*
* Also, because we're trying to determine whether this fragment as a whole
* overlaps with the root at all, we narrow our search (since it should match
* immediately or not at all). This is in contrast to stage 1, where we
* search the entire vector looking for all possible matches.
*
* This function returns 0 if no match was found (including failure to find
* one due to silence), or 1 if we found a match.
*
* When a match is found, the sync_result_t is set to the boundaries of
* matching run (begin/end, in terms of the root) and how far out of sync
* the fragment is from the canonical root (offset). Note that this offset
* is opposite in sign from the notion of offset used by try_sort_sync()
* and stage 1 generally.
*/
static long int
i_iterate_stage2(cdrom_paranoia_t *p,
v_fragment_t *v,
sync_result_t *r,
void(*callback)(long int, paranoia_cb_mode_t))
{
root_block *root=&(p->root);
long matchbegin=-1,matchend=-1,offset;
long fbv,fev;
#if TRACE_PARANOIA & 2
fprintf(stderr, "- Comparing fragment [%ld-%ld] to root [%ld-%ld]...",
fb(v), fe(v), rb(root), re(root));
#endif
#ifdef NOISY
fprintf(stderr,"Stage 2 search: fbv=%ld fev=%ld\n",fb(v),fe(v));
#endif
/* Quickly check whether there could possibly be any overlap between
* the verified fragment and the root. Our search will allow up to
* (p->dynoverlap) jitter between the two, so we expand the fragment
* search area by p->dynoverlap on both sides and see if that expanded
* area overlaps with the root.
*
* We could just as easily expand root's boundaries by p->dynoverlap
* instead and achieve the same result.
*/
if (min(fe(v) + p->dynoverlap,re(root)) -
max(fb(v) - p->dynoverlap,rb(root)) <= 0)
return(0);
if (callback)
(*callback)(fb(v), PARANOIA_CB_VERIFY);
/* We're going to try to match the fragment to the root while allowing
* for p->dynoverlap jitter, so we'll actually be looking at samples
* in the fragment whose position claims to be up to p->dynoverlap
* outside the boundaries of the root. But, of course, don't extend
* past the edges of the fragment.
*/
fbv = max(fb(v), rb(root)-p->dynoverlap);
/* Skip past leading zeroes in the fragment, and bail if there's nothing
* but silence. We handle silence later separately.
*/
while (fbv<fe(v) && fv(v)[fbv-fb(v)]==0)
fbv++;
if (fbv == fe(v))
return(0);
/* This is basically the same idea as the initial calculation for fbv
* above. Look at samples up to p->dynoverlap outside the boundaries
* of the root, but don't extend past the edges of the fragment.