-
Notifications
You must be signed in to change notification settings - Fork 0
/
gen_note.js
113 lines (92 loc) · 2.46 KB
/
gen_note.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
var iconv=require("iconv-lite");
var fs=require("fs");
var lst=fs.readFileSync("note.lst","utf8").split(/\r?\n/);
var count=0;
var max=0; //set to 0 for all files
var writeToDisk=true;
var replaceEntity=function(str) {
return str.replace(/&#(\d+);/g,function(m,m1){
var n=parseInt(m1);
if (isNaN(n)) {
throw "cannot parse "+m1;
}
return String.fromCharCode(parseInt(m1));
})
}
var replaces=[
[/ñ/g,"ñ"],
[/"/g,'"'],
[/î/g,"î"],
[/Ñ/g,"Ñ"],
[/â/g,"â"],
[/<1>/g,"1."],
[/<2>/g,"2."],
[/<3>/g,"3."],
[/\nmso-bidi-language:AR-SA">j<\/span>/g,'>\nṃ']
];
var getBody=function(str,fn){
var at=str.indexOf('<p style="line-height: 150%">');
if (at<0) {
console.error("cannot find start",fn);
throw fn;
}
str=str.substr(at+29);
var at=str.lastIndexOf('</p>');
if (at<0) {
console.error("cannot find end",fn);
throw fn;
}
str=str.substring(0,at);
return str;
}
var fixHoles_2016_5_5=function(content,fn){ //found by checknote.js
var files=["1a016r","1a032r","1d009r"]
if (files.indexOf(fn)==-1) return content;
if (fn==="1a016r") {
content=content.replace(/\((10\d)\)/g,function(m,m1){
return '\n<ndef n="'+m1+'"/>';
});
} else if (fn==="1a032r") {
content=content.replace("(100)",function(m){
return '\n<ndef n="100"/>';
});
} else if (fn==="1d009r") {
content=content.replace("(115)",function(m,m1){
return '\n<ndef n="115"/>';
});
}
return content;
}
var processfile=function(fn){
if (max && count>max) return;
count++;
var out="",filename=fn.substr(0,fn.length-4);
var content=fs.readFileSync("html/"+fn);
var str=iconv.decode(content,'big5').replace(/\r?\n/g,"\n");
var targetfn=fn.toLowerCase();
targetfn=filename+".xml";
str=getBody(str,filename);
str=replaceEntity(str);
for (var i in replaces) {
str=str.replace(replaces[i][0],replaces[i][1]);
}
str=str.replace(/\(<A NAME=\d+>(\d+)<\/A>\)/g,function(m,m1){
return '{{ndef n="'+m1+'"/}}';
});
str=str.replace(/\n/g,"");
str=str.replace(/<.+?>/g,"");
str=str.replace(/ /g," ");
str=str.replace(/ +/g," ");
str=str.replace(/\{\{(.+?)\}\}/g,function(m,m1){
return "\n<"+m1+">";
})
str=str.replace(/\*\n<ndef n="(.+?)"/g,function(m,m1){
return '\n<ndef n="'+m1+'" star="1"';
});
str=str.trim();
out=fixHoles_2016_5_5(str,filename);
console.log(targetfn)
if (writeToDisk) fs.writeFileSync("xml_note_gen/"+targetfn,out,"utf8");
}
lst.forEach(processfile);
console.log("total files",lst.length)