forked from openshift/openshift-docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_for_portal.py
973 lines (789 loc) · 36 KB
/
build_for_portal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
#!/usr/bin/python
import argparse
import configparser
import filecmp
import fnmatch
import logging
import os
import re
import shutil
import subprocess
import sys
import time
import yaml
from aura import cli
cli.init_logging(False, True)
has_errors = False
CLONE_DIR = "."
BASE_PORTAL_URL = "https://access.redhat.com/documentation/en-us/"
# ID_RE = re.compile("^\[(?:\[|id=\'|#)(.*?)(\'?,.*?)?(?:\]|\')?\]", re.M | re.DOTALL)
ID_RE = re.compile("^\[(?:\[|id=\'|#|id=\")(.*?)(\'?,.*?)?(?:\]|\'|\")?\]", re.M | re.DOTALL)
LINKS_RE = re.compile("(?:xref|link):([\./\w_-]*/?[\w_.-]*\.(?:html|adoc))?(#[\w_-]*)?(\[.*?\])", re.M | re.DOTALL)
EXTERNAL_LINK_RE = re.compile("[\./]*([\w_-]+)/[\w_/-]*?([\w_.-]*\.(?:html|adoc))", re.DOTALL)
INCLUDE_RE = re.compile("include::(.*?)\[(.*?)\]", re.M)
IFDEF_RE = re.compile(r"^if(n?)def::(.*?)\[\]", re.M)
ENDIF_RE = re.compile(r"^endif::(.*?)\[\]\r?\n", re.M)
COMMENT_CONTENT_RE = re.compile(r"^^////$.*?^////$", re.M | re.DOTALL)
TAG_CONTENT_RE = re.compile(r"//\s+tag::(.*?)\[\].*?// end::(.*?)\[\]", re.M | re.DOTALL)
CMP_IGNORE_FILES = [".git", ".gitignore", "README.md", "build.cfg"]
DEVNULL = open(os.devnull, 'wb')
MASTER_FILE_BASE = "= {title}\n\
:product-author: {product-author}\n\
:product-title: {product}\n\
:product-version: {product-version}\n\
:{distro}:\n\
:imagesdir: images\n\
:idseparator: -\n\
{preface-title}\n"
DOCINFO_BASE = "<title>{title}</title>\n\
<productname>{{product-title}}</productname>\n\
<productnumber>{{product-version}}</productnumber>\n\
<subtitle>Enter a short description here.</subtitle>\n\
<abstract>\n\
<para>A short overview and summary of the book's subject and purpose, traditionally no more than one paragraph long.</para>\n\
</abstract>\n\
<authorgroup>\n\
<orgname>{product-author}</orgname>\n\
</authorgroup>\n\
<xi:include href=\"Common_Content/Legal_Notice.xml\" xmlns:xi=\"http://www.w3.org/2001/XInclude\" />\n"
# A list of book titles, that still use the old drupal url format (ie includes the product/version in the book title part)
# eg. openshift-enterprise/version-3.0/openshift-enterprise-30-getting-started vs openshift-enterprise/version-3.0/getting-started
DRUPAL_OLD_URL_TITLES = [
"Administrator Guide",
"Architecture",
"CLI Reference",
"Creating Images",
"Developer Guide",
"Getting Started",
"REST API Reference",
"Using Images",
"What's New?"
]
# A mapping of upstream book/category names to CP book names
BOOK_NAME_OVERRIDES = {
"Administration": "Administrator Guide"
}
# Lines that should be stripped out/ignored when cleaning the content
IGNORE_LINES = [
"{product-author}\n",
"{product-version}\n",
"{product-version]\n",
"{Lucas Costi}\n",
"toc::[]\n"
]
# Each MACRO in this list is omitted from the output
# if the input appears as ':MACRO:' (colon, MACRO, colon).
IGNORE_MACROS = [
"description",
"keywords",
"icons",
"data-uri",
"toc",
"toc-title"
]
# Files where the title should be removed when building the all-in-one
ALL_IN_ONE_SCRAP_TITLE = [
"welcome/index.adoc"
]
# Files that should be commented out in the toc structure
COMMENT_FILES = [
"admin_guide/overview.adoc",
"creating_images/overview.adoc",
"dev_guide/overview.adoc",
"using_images/overview.adoc",
"rest_api/overview.adoc"
]
# Map FILENAME to a map of TITLE to ID. In most of the cases the
# ID is the TITLE downcased, with "strange" chars replaced by hyphen.
# A notable exception is 'any' TITLE.
TITLE_IDS = {}
# A dictionary of existing dup ids to new unique ids
DUPLICATE_IDS = {}
# Map FILENAME to a map of BAD to GOOD. Most of the time, BAD and GOOD
# are in link syntax, i.e., beginning with "link:", but not always.
INCORRECT_LINKS = {}
log = logging.getLogger("build")
def setup_parser():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--distro", help="The distribution to build for", default="openshift-enterprise")
parser.add_argument("--all-in-one", help=argparse.SUPPRESS, action="store_true")
parser.add_argument("--title", help=argparse.SUPPRESS, default="Documentation")
parser.add_argument("--product", default="OpenShift Enterprise")
parser.add_argument("--version", default="3.0")
parser.add_argument("--author", default="Red Hat OpenShift Documentation Team")
parser.add_argument("--upstream-url", help="The upstream source url", default="https://github.com/openshift/openshift-docs.git")
parser.add_argument("--upstream-branch", help="The upstream source branch", default="enterprise-3.0")
parser.add_argument("--branch", help="The GitLab branch to commit changes into", default="GA")
parser.add_argument("-p", "--push", help="Commit and push the changes into GitLab", action="store_true")
parser.add_argument("--no-clean", help="Don't clean the drupal-build directory before building", action="store_true")
parser.add_argument("--no-upstream-fetch", help="Don't fetch the upstream sources", action="store_true")
return parser
def find_build_config_file():
"""
Finds the build config file to use, as it might be _topic_map.yml or _build_cfg.yml
"""
config = os.path.abspath(os.path.join(CLONE_DIR, "_topic_map.yml"))
if not os.path.isfile(config):
config = os.path.abspath(os.path.join(CLONE_DIR, "_build_cfg.yml"))
return config
def parse_build_config(config):
"""
Parses the build config and returns a tree based structure for the config.
"""
config = os.path.expanduser(config)
with open(config, "r") as f:
data = list(yaml.load_all(f))
for book in data:
book_name = book['Name']
if book_name in BOOK_NAME_OVERRIDES:
book['Name'] = BOOK_NAME_OVERRIDES[book_name]
return data
def iter_tree(node, distro, dir_callback=None, topic_callback=None, include_path=True, parent_dir="", depth=0):
"""
Iterates over a build config tree starting from a specifc node, skipping content where the distro doesn't match. Additionally calls are
made to the dir_callback or topic_callback functions when a directory or topic is found.
"""
if "Topics" in node:
if check_node_distro_matches(node, distro):
if include_path:
topics_dir = os.path.join(parent_dir, node["Dir"])
else:
topics_dir = ""
if dir_callback is not None:
dir_callback(node, parent_dir, depth)
for topic in node["Topics"]:
iter_tree(topic, distro, dir_callback, topic_callback, True, topics_dir, depth + 1)
elif check_node_distro_matches(node, distro):
if topic_callback is not None:
topic_callback(node, parent_dir, depth)
def check_node_distro_matches(node, distro):
"""
Checks to see if the specified distro matches a distro in the nodes distros list. If there is no distros list specified on the
node then all distros are allowed, so return true.
"""
if "Distros" not in node:
return True
else:
node_distros = [x.strip() for x in node['Distros'].split(",")]
for node_distro in node_distros:
# Check for an exact match, or a glob match
if node_distro == distro or fnmatch.fnmatchcase(distro, node_distro):
return True
return False
def ensure_directory(directory):
"""
Creates DIRECTORY if it does not exist.
"""
if not os.path.exists(directory):
os.mkdir(directory)
def build_master_files(info):
"""
Builds the master.adoc and docinfo.xml files for each guide specified in the config.
"""
dest_dir = info['dest_dir']
all_in_one = info['all_in_one']
all_in_one_text = ""
for book in info['book_nodes']:
book_dest_dir = os.path.join(dest_dir, book['Dir'])
ensure_directory(book_dest_dir)
book_info = dict(info)
book_info['title'] = book['Name']
master = generate_master_entry(book, book['Dir'], info['distro'], all_in_one, all_in_one=all_in_one)
# Save the content
if not all_in_one:
master_file = os.path.join(book_dest_dir, 'master.adoc')
docinfo_file = os.path.join(book_dest_dir, 'docinfo.xml')
master_base = MASTER_FILE_BASE.format(**book_info)
log.debug("Writing " + master_file)
with open(master_file, "w") as f:
f.write(master_base + master)
log.debug("Writing " + docinfo_file)
with open(docinfo_file, "w") as f:
f.write(DOCINFO_BASE.format(**book_info))
else:
if all_in_one_text == "":
# Remove the title for the first file in the book
master = master.replace("= " + book['Name'] + "\n", "")
# Set the preface title from the first file in the book
first_file = os.path.join(info['src_dir'], book['Dir'], book['Topics'][0]['File'] + ".adoc")
preface_title = None
with open(first_file, "r") as f:
line = f.readline()
while line:
if include_line(line):
preface_title = re.sub("^=+ ", "", line)
break
line = f.readline()
if preface_title is not None:
info['preface-title'] = ":preface-title: " + preface_title
all_in_one_text += master
if all_in_one:
master_file = os.path.join(dest_dir, 'master.adoc')
docinfo_file = os.path.join(dest_dir, 'docinfo.xml')
master_base = MASTER_FILE_BASE.format(**info)
log.debug("Writing " + master_file)
with open(master_file, "w") as f:
f.write(master_base + all_in_one_text)
log.debug("Writing " + docinfo_file)
with open(docinfo_file, "w") as f:
f.write(DOCINFO_BASE.format(**info))
def generate_master_entry(node, book_dir, distro, include_name=True, all_in_one=False):
"""
Generates the master.adoc core content for a specific book/node.
"""
master_entries = []
def dir_callback(dir_node, parent_dir, depth):
if include_name or depth > 0:
master_entries.append("=" * (depth + 1) + " " + dir_node["Name"].replace("\\", ""))
def topic_callback(topic_node, parent_dir, depth):
book_file_path = os.path.join(parent_dir, topic_node["File"] + ".adoc")
file_path = os.path.join(book_dir, book_file_path)
include = "include::" + book_file_path + "[leveloffset=+" + str(depth) + "]"
if not all_in_one and file_path in COMMENT_FILES:
master_entries.append("////")
master_entries.append(include)
master_entries.append("////")
else:
master_entries.append(include)
# Add a blank line
master_entries.append("")
# Iterate over the tree and build the master.adoc content
iter_tree(node, distro, dir_callback, topic_callback, include_name)
return "\n".join(master_entries)
def reformat_for_drupal(info):
"""
Reformats the source content for use in the Customer Portal. This function does the following:
- Copies images over and flattens them into a single dir
- Copies source asciidoc over
- Filters the AsciiDoc source to remove duplicate macro definitions, that should only be in the main file.
- Adds id's for each file, so the files can be properly cross referenced.
- Adds id's to sections that are cross referenced, but have no id.
- Fixes duplicate id's in the source content.
- Fixes links that have been done incorrectly and should be cross references instead.
"""
books = info['book_nodes']
src_dir = info['src_dir']
dest_dir = info['dest_dir']
distro = info['distro']
# Build a mapping of files to ids
# Note: For all-in-one we have to collect ids from all books first
file_to_id_map = {}
if info['all_in_one']:
book_ids = []
for book in books:
book_ids.extend(collect_existing_ids(book, distro, src_dir))
for book in books:
file_to_id_map.update(build_file_to_id_map(book, distro, book_ids, src_dir))
else:
for book in books:
book_ids = collect_existing_ids(book, distro, src_dir)
file_to_id_map.update(build_file_to_id_map(book, distro, book_ids, src_dir))
info['file_to_id_map'] = file_to_id_map
# Reformat the data
for book in books:
log.info("Processing %s", book['Dir'])
book_src_dir = os.path.join(src_dir, book['Dir'])
if info['all_in_one']:
images_dir = os.path.join(dest_dir, "images")
else:
book_dest_dir = os.path.join(dest_dir, book['Dir'])
images_dir = os.path.join(book_dest_dir, "images")
ensure_directory(images_dir)
log.debug("Copying source files for " + book['Name'])
copy_files(book, book_src_dir, src_dir, dest_dir, info)
log.debug("Copying images for " + book['Name'])
copy_images(book, src_dir, images_dir, distro)
def copy_images(node, src_path, dest_dir, distro):
"""
Copy images over to the destination directory and flatten all image directories into the one top level dir.
"""
def dir_callback(dir_node, parent_dir, depth):
node_dir = os.path.join(parent_dir, dir_node['Dir'])
src = os.path.join(node_dir, "images")
if os.path.exists(src):
src_files = os.listdir(src)
for src_file in src_files:
shutil.copy(os.path.join(src, src_file), dest_dir)
iter_tree(node, distro, dir_callback, parent_dir=src_path)
def copy_files(node, book_src_dir, src_dir, dest_dir, info):
"""
Recursively copy files from the source directory to the destination directory, making sure to scrub the content, add id's where the
content is referenced elsewhere and fix any links that should be cross references.
"""
def dir_callback(dir_node, parent_dir, depth):
node_dest_dir = os.path.join(dest_dir, parent_dir, dir_node['Dir'])
ensure_directory(node_dest_dir)
def topic_callback(topic_node, parent_dir, depth):
node_src_dir = os.path.join(src_dir, parent_dir)
node_dest_dir = os.path.join(dest_dir, parent_dir)
src_file = os.path.join(node_src_dir, topic_node["File"] + ".adoc")
dest_file = os.path.join(node_dest_dir, topic_node["File"] + ".adoc")
# Copy the file
copy_file(info, book_src_dir, src_file, dest_dir, dest_file)
iter_tree(node, info['distro'], dir_callback, topic_callback)
def copy_file(info, book_src_dir, src_file, dest_dir, dest_file, include_check=True, tag=None, cwd=None):
"""
Copies a source file to destination, making sure to scrub the content, add id's where the content is referenced elsewhere and fix any
links that should be cross references. Also copies any includes that are referenced, since they aren't included in _build_cfg.yml.
"""
# It's possible that the file might have been created by another include, if so then just return
if os.path.isfile(dest_file):
return
# Touch the dest file, so we can handle circular includes
parent_dir = os.path.dirname(dest_file)
if not os.path.exists(parent_dir):
os.makedirs(parent_dir)
#os.mknod(dest_file)
open(dest_file, 'w').close()
# Scrub/fix the content
content = scrub_file(info, book_src_dir, src_file, tag=tag, cwd=cwd)
# Check for any includes
if include_check:
cleaned_content = remove_conditional_content(content, info)
include_iter = INCLUDE_RE.finditer(cleaned_content)
for include in include_iter:
include_text = include.group(0)
include_path = include.group(1)
include_unparsed_vars = include.group(2)
# Determine the include vars
include_vars = {}
if include_unparsed_vars is not None and len(include_unparsed_vars) > 0:
for meta in re.split(r"\s*,\s*", include_unparsed_vars):
key, value = re.split("\s*=\s*", meta, 2)
include_vars[key] = value
# Determine the include src/dest paths
include_file = os.path.join(os.path.dirname(book_src_dir), include_path)
relative_path = os.path.relpath(include_file, os.path.dirname(src_file))
# If the path is in another book, copy it into this one
relative_book_path = os.path.relpath(include_file, book_src_dir)
if relative_book_path.startswith("../"):
path, src_book_name = os.path.split(book_src_dir)
dest_include_dir = os.path.join(dest_dir, src_book_name, "includes")
relative_path = os.path.join(os.path.relpath(dest_include_dir, parent_dir), os.path.basename(include_file))
else:
dest_include_dir = os.path.abspath(os.path.join(os.path.dirname(dest_file), os.path.dirname(relative_path)))
dest_include_file = os.path.join(dest_include_dir, os.path.basename(include_file))
# Make sure we have a reference to the current working dir
current_dir = cwd or os.path.dirname(src_file)
include_tag = include_vars.get("tag", None)
# Copy the file and fix the content
if not os.path.isfile(dest_include_file):
copy_file(info, book_src_dir, include_file, dest_dir, dest_include_file, tag=include_tag, cwd=current_dir)
else:
# The file has already been copied, so just fix the links for this tag
with open(dest_include_file, 'r') as f:
include_content = f.read()
# Fix any links
include_content = fix_links(include_content, info, book_src_dir, include_file, tag=include_tag, cwd=cwd)
with open(dest_include_file, "w") as f:
f.write(include_content)
content = content.replace(include_text, include.expand("include::" + relative_path + "[\\2]"))
with open(dest_file, "w") as f:
f.write(content)
def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None):
"""
Scrubs a file and returns the cleaned file contents.
"""
base_src_file = src_file.replace(info['src_dir'] + "/", "")
# Get a list of predefined custom title ids for the file
title_ids = TITLE_IDS.get(base_src_file, {})
# Read in the source content
with open(src_file, 'r') as f:
src_file_content = f.readlines()
# Scrub the content
content = ""
header_found = content_found = False
current_id = None
for line in src_file_content:
# Ignore any leading blank lines, before any meaningful content is found
if line.strip() == "" and not content_found:
continue
# Check if the line should be included in the output
if include_line(line):
content_found = True
# Setup the document header content/id
if not header_found and line.strip() != "" and line.startswith("="):
header_found = True
if info['all_in_one'] and base_src_file in ALL_IN_ONE_SCRAP_TITLE and line.startswith("= "):
continue
# Add a section id if one doesn't exist, so we have something to link to
elif current_id is None and src_file in info['file_to_id_map']:
file_id = info['file_to_id_map'][src_file]
content += "[[" + file_id + "]]\n"
# Add a custom title id, if one is needed
elif line.startswith("=") and current_id is None:
for title in title_ids:
title_re = r"^=+ " + title.replace(".", "\\.").replace("?", "\\?") + "( (anchor|\[).*?)?(\n)?$"
if re.match(title_re, line):
content += "[[" + title_ids[title] + "]]\n"
# Set the current id based on the line content
if current_id is None and ID_RE.match(line.strip()):
current_id = line.strip()
elif current_id is not None and line.strip != "":
current_id = None
# Add the line to the processed content
content += line
# Fix up any duplicate ids
if base_src_file in DUPLICATE_IDS:
for duplicate_id, new_id in list(DUPLICATE_IDS[base_src_file].items()):
content = content.replace("[[" + duplicate_id + "]]", "[[" + new_id + "]]")
# Replace incorrect links with correct ones
if base_src_file in INCORRECT_LINKS:
for incorrect_link, fixed_link in list(INCORRECT_LINKS[base_src_file].items()):
content = content.replace(incorrect_link, fixed_link)
# Fix up the links
content = fix_links(content, info, book_src_dir, src_file, tag=tag, cwd=cwd)
return content
def include_line(line):
"""
Determines if a line should be included in the filtered output.
"""
if line in IGNORE_LINES:
return False
for macro in IGNORE_MACROS:
if line.startswith(":" + macro + ":"):
return False
return True
def fix_links(content, info, book_src_dir, src_file, tag=None, cwd=None):
"""
Fix any links that were done incorrectly and reference the output instead of the source content.
"""
if info['all_in_one']:
content = fix_links(content, info['src_dir'], src_file, info)
else:
# Determine if the tag should be passed when fixing the links. If it's in the same book, then process the entire file. If it's
# outside the book then don't process it.
if book_src_dir in src_file:
content = _fix_links(content, book_src_dir, src_file, info, cwd=cwd)
else:
content = _fix_links(content, book_src_dir, src_file, info, tag=tag, cwd=cwd)
return content
def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None):
"""
Fix any links that were done incorrectly and reference the output instead of the source content.
"""
# TODO Deal with xref so that they keep the proper path. Atm it'll just strip the path and leave only the id
file_to_id_map = info['file_to_id_map']
current_dir = cwd or os.path.dirname(src_file)
cleaned_content = remove_conditional_content(content, info, tag=tag)
links = LINKS_RE.finditer(cleaned_content)
for link in links:
link_text = link.group(0)
link_file = link.group(1)
link_anchor = link.group(2)
link_title = link.group(3)
if link_file is not None:
fixed_link_file = link_file.replace(".html", ".adoc")
fixed_link_file_abs = os.path.abspath(os.path.join(current_dir, fixed_link_file))
if fixed_link_file_abs in file_to_id_map:
# We are dealing with a cross reference to another book here
external_link = EXTERNAL_LINK_RE.search(link_file)
book_dir_name = external_link.group(1)
# Find the book name
book_name = book_dir_name
for book in info['data']:
if check_node_distro_matches(book, info['distro']) and book['Dir'] == book_dir_name:
book_name = book['Name']
break
fixed_link_file = BASE_PORTAL_URL + build_portal_url(info, book_name)
if link_anchor is None:
fixed_link = "link:" + fixed_link_file + "#" + file_to_id_map[fixed_link_file_abs] + link_title
else:
fixed_link = "link:" + fixed_link_file + link_anchor + link_title
else:
# Cross reference or link that isn't in the docs suite
fixed_link = link_text
if EXTERNAL_LINK_RE.search(link_file) is not None:
rel_src_file = src_file.replace(os.path.dirname(book_dir) + "/", "")
has_errors = True
log.error("ERROR (%s): \"%s\" appears to try to reference a file not included in the \"%s\" distro", rel_src_file, link_text.replace("\n", ""), info['distro'])
else:
fixed_link = "xref:" + link_anchor.replace("#", "") + link_title
content = content.replace(link_text, fixed_link)
return content
def remove_conditional_content(content, info, tag=None):
"""
Removes any conditional content that doesn't match for the specified distro
"""
# Remove any ifdef content
ifdef = IFDEF_RE.search(content)
while ifdef is not None:
is_not_def = ifdef.group(1) == "n"
ifdef_distros = ifdef.group(2).split(",")
pos = ifdef.start()
end = ifdef.end()
# Determine if we should strip the conditional content, based on the distro
strip_content = False
if is_not_def and info['distro'] in ifdef_distros:
strip_content = True
elif not is_not_def and info['distro'] not in ifdef_distros:
strip_content = True
# Remove the conditional content
if strip_content:
# Find the correct endif for the current ifdef
search_pos = end
endpos = len(content)
while True:
next_ifdef = IFDEF_RE.search(content, search_pos)
endif = ENDIF_RE.search(content, search_pos)
if not endif:
break
elif not next_ifdef or next_ifdef.start() > endif.start():
endpos = endif.end()
break
else:
search_pos = endif.end()
# Replace the content and move the end pos to be the same as the start since the content was removed
ifdef_text = content[pos:endpos]
content = content.replace(ifdef_text, "")
end = pos
# Move onto the next ifdef
ifdef = IFDEF_RE.search(content, end)
# Remove commented out content
for comment in COMMENT_CONTENT_RE.finditer(content):
content = content.replace(comment.group(0), "")
# Remove content outside of tags
if tag is not None:
for tag_match in TAG_CONTENT_RE.finditer(content):
tag_text = tag_match.group(0)
tag_label = tag_match.group(1)
if tag_label == tag:
# Tag matches, so only use the content in the tag
content = tag_text
return content
def collect_existing_ids(node, distro, path):
"""
Examines all nodes asciidoc file contents and returns any existing ids.
"""
book_ids = []
def topic_callback(topic_node, parent_dir, depth):
src_file = os.path.join(parent_dir, topic_node["File"] + ".adoc")
file_ids = extract_file_ids(src_file)
book_ids.extend(file_ids)
iter_tree(node, distro, topic_callback=topic_callback, parent_dir=path)
return book_ids
def build_file_to_id_map(node, distro, existing_ids, path=""):
"""
Builds a mapping of file names/paths to the root id for the file. This is used to fix the links that are done incorrectly.
"""
file_to_id_map = {}
def topic_callback(topic_node, parent_dir, depth):
src_file = os.path.join(parent_dir, topic_node["File"] + ".adoc")
file_to_id_map[src_file] = build_file_id(topic_node["Name"], file_to_id_map, existing_ids)
iter_tree(node, distro, topic_callback=topic_callback, parent_dir=path)
return file_to_id_map
def extract_file_ids(file_path):
"""
Extracts all the ids used in the specified file.
"""
with open(file_path, "r") as f:
content = f.read()
ids = ID_RE.finditer(content)
return [id.group(1) for id in ids]
def build_file_id(file_title, file_to_id_map, existing_ids):
"""
Generates a unique id for a file, based on it's title.
"""
file_id = base_id = re.sub(r"[\[\]\(\)#]", "", file_title.lower().replace("_", "-").replace(" ", "-"))
count = 1
while file_id in existing_ids or file_id in list(file_to_id_map.values()):
file_id = base_id + "-" + str(count)
count += 1
return file_id
def build_portal_url(info, book_name):
"""
Builds a portal url path by escaping the content in the same way drupal does.
"""
product = info['product']
version = info['product-version']
return generate_url_from_name(product) + "/" + generate_url_from_name(version) + "/html-single/" + generate_url_from_name(book_name) + "/"
def replace_nbsp(val):
"""Replaces non breaking spaces with a regular space"""
if val is not None:
# Check if the string is unicode
if isinstance(val, str):
return val.replace('\xa0', ' ')
else:
return val.replace('\xc2\xa0', ' ')
else:
return None
def generate_url_from_name(name, delimiter='_'):
"""
Generates a url fragment from a product, version or titles name.
"""
# Remove characters that aren't allowed in urls
url = re.sub("^\.+|[^0-9a-zA-Z _\-.]+", "", replace_nbsp(name))
# Replace spaces with the delimiter
url = re.sub("\s+", delimiter, url)
# Replace multiple underscores with a single underscore
url = re.sub(delimiter + "+", delimiter, url)
return url.lower()
def call_git_command(*args, **kwargs):
"""
Calls a git command and retries the command if it is unable to connect to the remote repo
"""
retries = kwargs.pop("retries", 3)
try:
output = subprocess.check_output(*args, **kwargs)
if output is not None:
sys.stdout.write(output)
return output
except subprocess.CalledProcessError as e:
retries -= 1
if retries > 0 and "fatal: Could not read from remote repository" in e.output:
# Connection failed, so wait a couple of secs and try again
time.sleep(2)
call_git_command(*args, retries=retries, **kwargs)
else:
raise
def fetch_sources(url, branch, dir=None, clone_dirname=None):
"""
Fetches sources from a git repository. If the repository doesn't exist it'll be cloned into `dir_name`, otherwise if it already has been
cloned, the repo will just be updated.
"""
# Setup the defaults
if dir is None:
dir = os.getcwd()
if clone_dirname is None:
clone_dirname = url.split('/')[-1].replace(".git", "")
# If the dir already exists update the content, otherwise clone it
clone_dir = os.path.abspath(os.path.join(dir, clone_dirname))
if os.path.exists(os.path.join(clone_dir, ".git")):
cmd = ["git", "pull", "-f"]
cmd_dir = clone_dir
# Do a checkout to make sure we are on the right branch
checkout_cmd = ["git", "checkout", branch]
subprocess.check_output(checkout_cmd, cwd=cmd_dir, stderr=subprocess.STDOUT)
else:
cmd = ["git", "clone", "-b", branch, url, clone_dirname]
cmd_dir = os.path.abspath(dir)
# Execute the command
call_git_command(cmd, cwd=cmd_dir, stderr=subprocess.STDOUT)
def sync_directories(src_dir, dest_dir, ignore=None):
"""
Syncs two directories so that the both contain the same content, with the exception of ignored files.
"""
if ignore is None:
ignore = []
ignore.extend(CMP_IGNORE_FILES)
dcmp = filecmp.dircmp(src_dir, dest_dir, ignore)
_sync_directories_dircmp(dcmp)
def _sync_directories_dircmp(dcmp):
# Remove files that only exist in the dest directory
for filename in dcmp.right_only:
right = os.path.join(dcmp.right, filename)
if os.path.isfile(right):
os.remove(right)
else:
shutil.rmtree(right)
# Copy files that only exist in the source directory or files that have changed
for filename in dcmp.left_only+dcmp.common_files:
left = os.path.join(dcmp.left, filename)
right = os.path.join(dcmp.right, filename)
if os.path.isfile(left):
shutil.copy2(left, right)
else:
shutil.copytree(left, right)
# Sync sub directories
for subdcmp in list(dcmp.subdirs.values()):
_sync_directories_dircmp(subdcmp)
def commit_and_push_changes(git_dir, git_branch, git_upstream_branch):
"""
Adds, commits and pushes any changes to a local git repository.
"""
# Add all the changes
add_cmd = ["git", "add", "--all"]
subprocess.check_call(add_cmd, cwd=git_dir)
try:
# Commit the changes
commit_cmd = ["git", "commit", "-m", "Merge branch 'upstream/" + git_upstream_branch + "' into " + git_branch,
"--author", "CCS OSE Build Script <[email protected]>"]
call_git_command(commit_cmd, cwd=git_dir, stderr=subprocess.STDOUT)
# Push the changes
push_cmd = ["git", "push"]
call_git_command(push_cmd, cwd=git_dir, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
if e.output is None or "nothing to commit" not in e.output:
raise
def parse_repo_config(config_file, distro, version):
# Make sure the repo config file exists
if not os.path.isfile(config_file):
log.error("Failed loading the repo configuration from %s", config_file)
sys.exit(-1)
parser = configparser.SafeConfigParser()
parser.read(config_file)
repo_urls = dict()
section_name = distro + "-" + version
if parser.has_section(section_name):
for (key, value) in parser.items(section_name):
repo_urls[key] = value
return repo_urls
def main():
parser = setup_parser()
args = parser.parse_args()
logging.basicConfig(format='%(message)s', level=logging.INFO, stream=sys.stdout)
# Copy down the latest files
if not args.no_upstream_fetch:
log.info("Fetching the upstream sources")
fetch_sources(args.upstream_url, args.upstream_branch, clone_dirname=CLONE_DIR)
config = find_build_config_file()
src_dir = os.path.dirname(config)
# Parse the build config
data = parse_build_config(config)
# Filter the list of books that should be built
book_nodes = [node for node in data if check_node_distro_matches(node, args.distro)]
# Make the new source tree
dest_dir = os.path.join(os.getcwd(), "drupal-build", args.distro)
if not args.no_clean:
log.info("Cleaning the drupal-build directory")
if os.path.exists(dest_dir):
shutil.rmtree(dest_dir)
os.makedirs(dest_dir)
elif not os.path.exists(dest_dir):
os.makedirs(dest_dir)
info = {
'title': args.title,
'product-author': args.author,
'product-version': args.version,
'product': args.product,
'distro': args.distro,
'src_dir': src_dir,
'dest_dir': dest_dir,
'data': data,
'book_nodes': book_nodes,
'all_in_one': args.all_in_one,
'preface-title': "",
"upstream_branch": args.upstream_branch
}
# Build the master files
log.info("Building the drupal files")
build_master_files(info)
# Copy the original data and reformat for drupal
reformat_for_drupal(info)
if has_errors:
sys.exit(1)
if args.push:
# Parse the repo urls
config_file = os.path.join(os.path.dirname(__file__), 'repos.ini')
repo_urls = parse_repo_config(config_file, args.distro, args.version)
# Make sure the base git dire exists
base_git_dir = os.path.join(os.getcwd(), "gitlab-repos")
ensure_directory(base_git_dir)
# Checkout the gitlab repo, copy the changes and push them back up
for book_dir, gitlab_repo_url in list(repo_urls.items()):
build_book_dir = os.path.join(dest_dir, book_dir)
git_dirname = gitlab_repo_url.split('/')[-1].replace(".git", "")
git_dir = os.path.join(base_git_dir, git_dirname)
try:
log.info("Fetching " + book_dir + " sources from GitLab")
fetch_sources(gitlab_repo_url, args.branch, base_git_dir, git_dirname)
log.info("Syncing " + book_dir)
sync_directories(build_book_dir, git_dir, ["docinfo.xml"])
log.info("Pushing " + book_dir + " changes back to GitLab")
commit_and_push_changes(git_dir, args.branch, args.upstream_branch)
except subprocess.CalledProcessError as e:
if e.output:
sys.stdout.write(e.output)
raise
if __name__ == "__main__":
main()