From 2bbced212e2ee93948c45360fee00b2e3f960392 Mon Sep 17 00:00:00 2001 From: Misko Hevery Date: Fri, 3 Dec 2010 15:42:11 -0800 Subject: [PATCH] Fix sanitization issues as suggested by evn --- regression/issue-169.html | 10 ++++++ regression/sanitizer.html | 8 +++++ src/sanitizer.js | 28 ++++++++--------- test/sanitizerSpec.js | 65 ++++++++++++++++++++++++++++++++------- 4 files changed, 86 insertions(+), 25 deletions(-) create mode 100644 regression/issue-169.html create mode 100644 regression/sanitizer.html diff --git a/regression/issue-169.html b/regression/issue-169.html new file mode 100644 index 000000000000..e18c4f2e0a03 --- /dev/null +++ b/regression/issue-169.html @@ -0,0 +1,10 @@ + + + + + + + + {{x1}} -- {{x1.bar[0].d}} + + \ No newline at end of file diff --git a/regression/sanitizer.html b/regression/sanitizer.html new file mode 100644 index 000000000000..775a6009f5a5 --- /dev/null +++ b/regression/sanitizer.html @@ -0,0 +1,8 @@ + + + + + +
{{html|html}}
+ + \ No newline at end of file diff --git a/src/sanitizer.js b/src/sanitizer.js index 66631a9087aa..d63cf69d1f8d 100644 --- a/src/sanitizer.js +++ b/src/sanitizer.js @@ -17,7 +17,7 @@ // Regular Expressions for parsing tags and attributes var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)\s*>/, END_TAG_REGEXP = /^<\s*\/\s*([\w:]+)[^>]*>/, - ATTR_REGEXP = /(\w+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g, + ATTR_REGEXP = /(\w+)(?:\s*=\s*(?:(?:"((?:[^"])*)")|(?:'((?:[^'])*)')|([^>\s]+)))?/g, BEGIN_TAG_REGEXP = /^/g, @@ -26,32 +26,32 @@ var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^'] NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character) // Empty Elements - HTML 4.01 -var emptyElements = makeMap("area,base,basefont,br,col,hr,img,input,isindex,link,param"); +var emptyElements = makeMap("area,br,col,hr,img"); // Block Elements - HTML 4.01 -var blockElements = makeMap("address,blockquote,button,center,dd,del,dir,div,dl,dt,fieldset,"+ - "form,hr,ins,isindex,li,map,menu,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); +var blockElements = makeMap("address,blockquote,center,dd,del,dir,div,dl,dt,"+ + "hr,ins,li,map,menu,ol,p,pre,script,table,tbody,td,tfoot,th,thead,tr,ul"); // Inline Elements - HTML 4.01 -var inlineElements = makeMap("a,abbr,acronym,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,img,"+ - "input,ins,kbd,label,map,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); +var inlineElements = makeMap("a,abbr,acronym,b,bdo,big,br,cite,code,del,dfn,em,font,i,img,"+ + "ins,kbd,label,map,q,s,samp,small,span,strike,strong,sub,sup,tt,u,var"); // Elements that you can, intentionally, leave open // (and which close themselves) -var closeSelfElements = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); +var closeSelfElements = makeMap("colgroup,dd,dt,li,p,td,tfoot,th,thead,tr"); // Special Elements (can contain anything) var specialElements = makeMap("script,style"); var validElements = extend({}, emptyElements, blockElements, inlineElements, closeSelfElements); //see: http://www.w3.org/TR/html4/index/attributes.html //Attributes that have their values filled in disabled="disabled" -var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); +var fillAttrs = makeMap("compact,ismap,nohref,nowrap"); //Attributes that have href and hence need to be sanitized var uriAttrs = makeMap("background,href,longdesc,src,usemap"); var validAttrs = extend({}, fillAttrs, uriAttrs, makeMap( 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+ - 'color,cols,colspan,coords,dir,face,for,headers,height,hreflang,hspace,id,'+ - 'label,lang,language,maxlength,method,name,prompt,rel,rev,rows,rowspan,rules,'+ - 'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,'+ + 'color,cols,colspan,coords,dir,face,headers,height,hreflang,hspace,'+ + 'lang,language,rel,rev,rows,rowspan,rules,'+ + 'scope,scrolling,shape,span,start,summary,target,title,type,'+ 'valign,value,vspace,width')); /** @@ -249,12 +249,12 @@ function htmlSanitizeWriter(buf){ if (!ignore && specialElements[tag]) { ignore = tag; } - if (!ignore && validElements[tag]) { + if (!ignore && validElements[tag] == true) { out('<'); out(tag); foreach(attrs, function(value, key){ var lkey=lowercase(key); - if (validAttrs[lkey] && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) { + if (validAttrs[lkey]==true && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) { out(' '); out(key); out('="'); @@ -267,7 +267,7 @@ function htmlSanitizeWriter(buf){ }, end: function(tag){ tag = lowercase(tag); - if (!ignore && validElements[tag]) { + if (!ignore && validElements[tag] == true) { out(''); diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js index 88da693d06c1..3ad6c1c93c2d 100644 --- a/test/sanitizerSpec.js +++ b/test/sanitizerSpec.js @@ -33,7 +33,7 @@ describe('HTML', function(){ expectHTML('ailc.').toEqual('ac.'); }); - it('should remove unknown tag names', function(){ + it('should remove unknown names', function(){ expectHTML('abc').toEqual('abc'); }); @@ -50,21 +50,33 @@ describe('HTML', function(){ }); it('should handle entities', function(){ - var everything = '
' + + var everything = '
' + '!@#$%^&*()_+-={}[]:";\'<>?,./`~ ħ
'; expectHTML(everything).toEqual(everything); }); it('should handle improper html', function(){ - expectHTML('< div id="
" alt=abc dir=\'"\' >text< /div>'). - toEqual('
text
'); + expectHTML('< div rel="" alt=abc dir=\'"\' >text< /div>'). + toEqual('
text
'); }); it('should handle improper html2', function(){ - expectHTML('< div id="" / >'). - toEqual('
'); + expectHTML('< div rel="
" / >'). + toEqual('
'); }); - + + it('should ignore back slash as escape', function(){ + expectHTML('xxx\\'). + toEqual('xxx\\'); + }); + + it('should ignore object attributes', function(){ + expectHTML(':)'). + toEqual(':)'); + expectHTML(':)'). + toEqual(''); + }); + describe('htmlSanitizerWriter', function(){ var writer, html; beforeEach(function(){ @@ -74,12 +86,12 @@ describe('HTML', function(){ it('should write basic HTML', function(){ writer.chars('before'); - writer.start('div', {id:'123'}, false); + writer.start('div', {rel:'123'}, false); writer.chars('in'); writer.end('div'); writer.chars('after'); - expect(html).toEqual('before
in
after'); + expect(html).toEqual('before
in
after'); }); it('should escape text nodes', function(){ @@ -93,8 +105,8 @@ describe('HTML', function(){ }); it('should escape attributes', function(){ - writer.start('div', {id:'!@#$%^&*()_+-={}[]:";\'<>?,./`~ \n\0\r\u0127'}); - expect(html).toEqual('
'); + writer.start('div', {rel:'!@#$%^&*()_+-={}[]:";\'<>?,./`~ \n\0\r\u0127'}); + expect(html).toEqual('
'); }); it('should ignore missformed elements', function(){ @@ -107,6 +119,37 @@ describe('HTML', function(){ expect(html).toEqual('
'); }); + describe('explicitly dissallow', function(){ + it('should not allow attributes', function(){ + writer.start('div', {id:'a', name:'a', style:'a'}); + expect(html).toEqual('
'); + }); + + it('should not allow tags', function(){ + function tag(name) { + writer.start(name, {}); + writer.end(name); + }; + tag('frameset'); + tag('frame'); + tag('form'); + tag('param'); + tag('object'); + tag('embed'); + tag('textarea'); + tag('input'); + tag('button'); + tag('option'); + tag('select'); + tag('script'); + tag('style'); + tag('link'); + tag('base'); + tag('basefont'); + expect(html).toEqual(''); + }); + }); + describe('isUri', function(){ function isUri(value) {