forked from uabrc/uabrc.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.linkcheckerrc
298 lines (264 loc) · 9.06 KB
/
.linkcheckerrc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# Sample configuration file; see the linkcheckerrc(5) man page or
# execute linkchecker -h for help on these options.
# Commandline options override these settings.
##################### output configuration ##########################
[output]
# enable debug messages; see 'linkchecker -h' for valid debug names, example:
#debug=all
# print status output
#status=1
# change the logging type
#log=text
# turn on/off --verbose
#verbose=0
# turn on/off --warnings
#warnings=1
# turn on/off --quiet
#quiet=0
# additional file output, example:
fileoutput=csv
# errors to ignore (URL regular expression, message regular expression)
#ignoreerrors=
# ignore all errors for broken.example.com:
# ^https?://broken.example.com/
# ignore SSL errors for dev.example.com:
# ^https://dev.example.com/ ^SSLError .*
##################### logger configuration ##########################
# logger output part names:
# all For all parts
# realurl The full url link
# result Valid or invalid, with messages
# extern 1 or 0, only in some logger types reported
# base <base href=...>
# name <a href=...>name</a> and <img alt="name">
# parenturl The referrer URL if there is any
# info Some additional info, e.g. FTP welcome messages
# warning Warnings
# dltime Download time
# checktime Check time
# url The original url name, can be relative
# intro The blurb at the beginning, "starting at ..."
# outro The blurb at the end, "found x errors ..."
# stats Statistics including URL lengths and contents.
# each Logger can have separate configuration parameters
# standard text logger
[text]
#filename=linkchecker-out.txt
#parts=all
#wraplength=65
# colors for the various parts, syntax is <color> or <type>;<color>
# type can be bold, light, blink, invert
# color can be default, black, red, green, yellow, blue, purple, cyan, white,
# Black, Red, Green, Yellow, Blue, Purple, Cyan, White
#colorparent=default
#colorurl=default
#colorname=default
#colorreal=cyan
#colorbase=purple
#colorvalid=bold;green
#colorinvalid=bold;red
#colorinfo=default
#colorwarning=bold;yellow
#colordltime=default
#colorreset=default
# GML logger
[gml]
#filename=linkchecker-out.gml
#parts=all
# valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings
# example:
#encoding=utf_16
# DOT logger
[dot]
#filename=linkchecker-out.dot
#parts=all
# default encoding is ascii since the original DOT format does not
# support other charsets, example:
#encoding=iso-8859-15
# CSV logger
[csv]
filename=out/linkchecker-raw.csv
separator=,
#quotechar="
#dialect=excel
parts=result,urlname,parentname,line,column,url
# SQL logger
[sql]
#filename=linkchecker-out.sql
#dbname=linksdb
#separator=;
#parts=all
# HTML logger
[html]
#filename=linkchecker-out.html
# colors for the various parts
#colorbackground=#fff7e5
#colorurl=#dcd5cf
#colorborder=#000000
#colorlink=#191c83
#colorwarning=#e0954e
#colorerror=#db4930
#colorok=#3ba557
#parts=all
# failures logger
[failures]
#filename=$XDG_DATA_HOME/linkchecker/failures
# custom xml logger
[xml]
#filename=linkchecker-out.xml
# system encoding is used by default. Example:
#encoding=iso-8859-1
# GraphXML logger
[gxml]
#filename=linkchecker-out.gxml
# system encoding is used by default. Example:
#encoding=iso-8859-1
# Sitemap logger
[sitemap]
#filename=linkchecker-out.sitemap.xml
#encoding=utf-8
#priority=0.5
#frequency=daily
##################### checking options ##########################
[checking]
# number of threads
#threads=10
# connection timeout in seconds
#timeout=60
# Time to wait for checks to finish after the user aborts the first time
# (with Ctrl-C or the abort button).
#aborttimeout=300
# The recursion level determines how many times links inside pages are followed.
recursionlevel=1
# parse a cookiefile for initial cookie data, example:
#cookiefile=/path/to/cookies.txt
# User-Agent header string to send to HTTP web servers
# Note that robots.txt are always checked with the original User-Agent. Example:
#useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
# When checking finishes, write a memory dump to a temporary file.
# The memory dump is written both when checking finishes normally
# and when checking gets canceled.
# The memory dump only works if the python-meliae package is installed.
# Otherwise a warning is printed to install it.
#debugmemory=0
# When checking absolute URLs inside local files, the given root directory
# is used as base URL.
# Note that the given directory must have URL syntax, so it must use a slash
# to join directories instead of a backslash.
# And the given directory must end with a slash.
# Unix example:
#localwebroot=/var/www/
# Windows example:
#localwebroot=/C|/public_html/
# Check SSL certificates. Set to an absolute pathname for a custom
# CA cert bundle to use. Set to zero to disable SSL certificate verification.
#sslverify=1
# Stop checking new URLs after the given number of seconds. Same as if the
# user hits Ctrl-C after X seconds. Example:
#maxrunseconds=600
# Don't download files larger than the given number of bytes
#maxfilesizedownload=5242880
# Don't parse files larger than the given number of bytes
#maxfilesizeparse=1048576
# Maximum number of URLs to check. New URLs will not be queued after the
# given number of URLs is checked. Example:
#maxnumurls=153
# Maximum number of requests per second to one host.
#maxrequestspersecond=10
# Respect the instructions in any robots.txt files
#robotstxt=1
# Allowed URL schemes as a comma-separated list. Example:
#allowedschemes=http,https
# Size of the result cache. Checking more urls might increase memory usage during runtime
#resultcachesize=100000
##################### filtering options ##########################
[filtering]
ignore=rc.uab.edu
# ignore everything with 'lconline' in the URL name
# lconline
# and ignore everything with 'bookmark' in the URL name
# bookmark
# and ignore all mailto: URLs
# ^mailto:
# do not recurse into the following URLs
#nofollow=
# just an example
# http://www\.example\.com/bla
# Ignore specified warnings (see linkchecker -h for the list of
# recognized warnings). Add a comma-separated list of warnings here
# that prevent a valid URL from being logged. Note that the warning
# will be logged for invalid URLs. Example:
#ignorewarnings=url-unicode-domain
# Regular expression to add more URLs recognized as internal links.
# Default is that URLs given on the command line are internal.
#internlinks=^http://www\.example\.net/
# Check external links
checkextern=1
##################### password authentication ##########################
[authentication]
# WARNING: if you store passwords in this configuration entry, make sure the
# configuration file is not readable by other users.
# Different user/password pairs for different URLs can be provided.
# Entries are a triple (URL regular expression, username, password),
# separated by whitespace.
# If the regular expression matches, the given user/password pair is used
# for authentication. The commandline options -u,-p match every link
# and therefore override the entries given here. The first match wins.
# At the moment, authentication is used for http[s] and ftp links.
#entry=
# Note that passwords are optional. If any passwords are stored here,
# this file should not readable by other users.
# ^https?://www\.example\.com/~calvin/ calvin mypass
# ^ftp://www\.example\.com/secret/ calvin
# if the website requires a login via a page with an HTML form the URL of the
# page and optionally the username and password input element name attributes
# can be provided.
#loginurl=http://www.example.com/
# The name attributes of the username and password HTML input elements
#loginuserfield=login
#loginpasswordfield=password
# Optionally the name attributes of any additional input elements and the values
# to populate them with. Note that these are submitted without checking
# whether matching input elements exist in the HTML form. Example:
#loginextrafields=
# name1:value1
# name 2:value 2
############################ Plugins ###################################
#
# uncomment sections to enable plugins
# Check HTML anchors
[AnchorCheck]
# Print HTTP header info
#[HttpHeaderInfo]
# Comma separated list of header prefixes to print.
# The names are case insensitive.
# The default list is empty, so it should be non-empty when activating
# this plugin. Example:
#prefixes=Server,X-
# Add country info to URLs
#[LocationInfo]
# Run W3C syntax checks
#[CssSyntaxCheck]
#[HtmlSyntaxCheck]
# Search for regular expression in page contents
#[RegexCheck]
# Example:
#warningregex=Oracle Error
# Search for viruses in page contents
#[VirusCheck]
#clamavconf=/etc/clamav/clamd.conf
# Check that SSL certificates have at least the given number of days validity.
#[SslCertificateCheck]
#sslcertwarndays=30
# Parse and check links in PDF files
#[PdfParser]
# Parse and check links in Word files
#[WordParser]
# Parse and check links in Markdown files.
# Supported links are:
# <http://autolink.com>
# [name](http://link.com "Optional title")
# [id]: http://link.com "Optional title"
[MarkdownCheck]
# Regexp of filename
#filename_re=.*\.(markdown|md(own)?|mkdn?)$