-
Notifications
You must be signed in to change notification settings - Fork 65
/
htmlparse.config
52 lines (42 loc) · 2.1 KB
/
htmlparse.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#This file controls all of the logic for parsing HTML contents to JavaScript
#If you find attackers use a new HTML field which they reference from JavaScript,
#you'll need to define that here.
#
#If its a function, you may need to also define that within the pre.js file.
#
#htmlparse.config hypothetical example:
#HTMLinput: <div onclick=fun()>
#Definition: !define DIVCLICK %s;
#ParseRule: !parse div onclick DIVCLICK:onclick
#Output: fun();
#If you applied a filter to the div tag like:
#!filter div fun\(\);
#The resulting output would be empty instead. Note how this is
#interpreted as a regular expression so we must escape (, ), and other
#special characters
#!define NAME definition (ONLY USE %s as format string)
#NOTE: USE SPACES AS DELIMITERS
#NOTE: DO NOT USE QUOTES around format string variables
#NOTE: If you don't want your variables to be quoted, use a NAME starting with "raw"
#NOTE: definitions starting with 'header' tell the program when the headers end
!define headerACTIVEX this[%s] = the_activex;
!define headerIDVAL idzzz.push(%s); valzzz.push(%s); txtzzz.push(%s);
!define headerTITLE document.title = String(%s);
!define headerBYNAME if(!(%s in namezzz)){namezzz[%s] = [];} namezzz[%s].push(%s);
!define rawSCRIPT ;%s
#!parse tag attrib NAME:param1,param2
#NOTE: '*' is a special character that is treated as empty
!parse object name,classid headerACTIVEX:id
!parse object id,classid headerACTIVEX:id
!parse * id,value headerIDVAL:id,value,contents
!parse * id,!value headerIDVAL:id,*,contents
!parse title * headerTITLE:contents
#!parse * * headerBYNAME:name,name,name,contents
!parse script * rawSCRIPT:contents
!parse body onload rawSCRIPT:onload
#!filter tag regex
#NOTE: the regular expression will zero out the output that matches
#NOTE: do not define multiple filters with the same tag
#NOTE: a tag of * will match everything
!filter script <[/]?script[^>]*>|<!--|//-->
!filter * ^javascript:\s*|^return\s+