diff --git a/Rakefile b/Rakefile index f0a1b4ffebb..e35685d32db 100644 --- a/Rakefile +++ b/Rakefile @@ -10,7 +10,7 @@ java = RUBY_PLATFORM =~ /java/ GENERATED_PARSER = "lib/nokogiri/css/generated_parser.rb" GENERATED_TOKENIZER = "lib/nokogiri/css/generated_tokenizer.rb" -EXTERNAL_JAVA_LIBRARIES = %w{isorelax jing nekohtml xercesImpl}.map{|x| "lib/#{x}.jar"} +EXTERNAL_JAVA_LIBRARIES = %w{isorelax jing nekohtml nekodtd xercesImpl}.map{|x| "lib/#{x}.jar"} JAVA_EXT = "lib/nokogiri/nokogiri.jar" JRUBY_HOME = Config::CONFIG['prefix'] @@ -138,7 +138,7 @@ namespace :java do task :build_external do Dir.chdir('ext/java') do LIB_DIR = '../../lib' - CLASSPATH = "#{JRUBY_HOME}/lib/jruby.jar:#{LIB_DIR}/nekohtml.jar:#{LIB_DIR}/xercesImpl.jar:#{LIB_DIR}/isorelax.jar:#{LIB_DIR}/jing.jar" + CLASSPATH = "#{JRUBY_HOME}/lib/jruby.jar:#{LIB_DIR}/nekohtml.jar:#{LIB_DIR}/nekodtd.jar:#{LIB_DIR}/xercesImpl.jar:#{LIB_DIR}/isorelax.jar:#{LIB_DIR}/jing.jar" sh "javac -g -cp #{CLASSPATH} nokogiri/*.java nokogiri/internals/*.java" sh "jar cf ../../#{JAVA_EXT} nokogiri/*.class nokogiri/internals/*.class" end diff --git a/ext/java/nokogiri/EncodingHandler.java b/ext/java/nokogiri/EncodingHandler.java new file mode 100644 index 00000000000..96df39f94ef --- /dev/null +++ b/ext/java/nokogiri/EncodingHandler.java @@ -0,0 +1,91 @@ +package nokogiri; + +import java.util.HashMap; + +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +/** + * Stub class to satisfy unit tests. I'm not sure where this class is + * meant to be used. As coded it won't really interact with any other + * classes and will have no effect on character encodings reported by + * documents being parsed. + * + * @author Patrick Mahoney + */ +public class EncodingHandler extends RubyObject { + protected static HashMap map = new HashMap(); + static { + addInitial(); + } + + protected String name; + + protected static void addInitial() { + map.put("UTF-8", "UTF-8"); + } + + public EncodingHandler(Ruby ruby, RubyClass klass, String value) { + super(ruby, klass); + name = value; + } + + @JRubyMethod(name="[]", meta=true) + public static IRubyObject get(ThreadContext context, + IRubyObject _klass, + IRubyObject keyObj) { + Ruby ruby = context.getRuntime(); + String key = keyObj.toString(); + String value = map.get(key); + if (value == null) + return ruby.getNil(); + + return new EncodingHandler( + ruby, + (RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler"), + value); + } + + @JRubyMethod(meta=true) + public static IRubyObject delete(ThreadContext context, + IRubyObject _klass, + IRubyObject keyObj) { + String key = keyObj.toString(); + String value = map.remove(key); + if (value == null) + return context.getRuntime().getNil(); + return context.getRuntime().newString(value); + } + + @JRubyMethod(name="clear_aliases!", meta=true) + public static IRubyObject clear_aliases(ThreadContext context, + IRubyObject _klass) { + map.clear(); + addInitial(); + return context.getRuntime().getNil(); + } + + @JRubyMethod(meta=true) + public static IRubyObject alias(ThreadContext context, + IRubyObject _klass, + IRubyObject orig, + IRubyObject alias) { + String value = map.get(orig.toString()); + if (value != null) + map.put(alias.toString(), value); + + return context.getRuntime().getNil(); + } + + @JRubyMethod + public IRubyObject name(ThreadContext context) { + return context.getRuntime().newString(name); + } +} diff --git a/ext/java/nokogiri/HtmlDocument.java b/ext/java/nokogiri/HtmlDocument.java index 15717456930..76f1737b8d5 100644 --- a/ext/java/nokogiri/HtmlDocument.java +++ b/ext/java/nokogiri/HtmlDocument.java @@ -2,10 +2,8 @@ import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; -import nokogiri.internals.HtmlDocumentImpl; -import nokogiri.internals.HtmlEmptyDocumentImpl; -import nokogiri.internals.HtmlParseOptions; -import nokogiri.internals.ParseOptions; +import nokogiri.internals.HtmlDomParserContext; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.anno.JRubyMethod; @@ -14,35 +12,26 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; +import org.w3c.dom.DocumentType; import org.xml.sax.SAXException; public class HtmlDocument extends XmlDocument { public HtmlDocument(Ruby ruby, RubyClass klazz, Document doc) { super(ruby, klazz, doc); - this.document = doc; - this.internalNode = new HtmlDocumentImpl(ruby, doc); } @JRubyMethod(name="new", meta = true, rest = true, required=0) - public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args) { + public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, + IRubyObject[] args) { HtmlDocument doc = null; try { - - /* - * A little explanation: - * I'm using an XmlDocument instead of a HTMLDocumentImpl in order - * not to have capitalized node names. - */ - - Document docNode = (new ParseOptions(0)).getDocumentBuilder().newDocument(); - + Document docNode = createNewDocument(); doc = new HtmlDocument(context.getRuntime(), (RubyClass) cls, - docNode); - doc.internalNode = new HtmlEmptyDocumentImpl(context.getRuntime(), - docNode); + docNode); } catch (Exception ex) { - throw context.getRuntime().newRuntimeError("couldn't create document: "+ex.toString()); + throw context.getRuntime() + .newRuntimeError("couldn't create document: "+ex.toString()); } RuntimeHelpers.invoke(context, doc, "initialize", args); @@ -50,40 +39,61 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj return doc; } - @JRubyMethod(meta = true, rest = true) - public static IRubyObject read_io(ThreadContext context, IRubyObject cls, IRubyObject[] args) { + public static IRubyObject do_parse(ThreadContext context, + IRubyObject klass, + IRubyObject[] args) { Ruby ruby = context.getRuntime(); + Arity.checkArgumentCount(ruby, args, 4, 4); + HtmlDomParserContext ctx = + new HtmlDomParserContext(ruby, args[3]); + ctx.setInputSource(context, args[0]); + return ctx.parse(context, klass, args[1]); + } - IRubyObject content = RuntimeHelpers.invoke(context, args[0], "read"); - args[0] = content; - - return read_memory(context, cls, args); + @JRubyMethod(meta = true, rest = true) + public static IRubyObject read_io(ThreadContext context, + IRubyObject cls, + IRubyObject[] args) { + return do_parse(context, cls, args); } @JRubyMethod(meta = true, rest = true) - public static IRubyObject read_memory(ThreadContext context, IRubyObject cls, IRubyObject[] args) { - - Ruby ruby = context.getRuntime(); - Arity.checkArgumentCount(ruby, args, 4, 4); - ParseOptions options = new HtmlParseOptions(args[3]); - try { - Document document; - document = options.parse(args[0].convertToString().asJavaString()); - HtmlDocument doc = new HtmlDocument(ruby, (RubyClass)cls, document); - doc.setUrl(args[1]); - options.addErrorsIfNecessary(context, doc); - return doc; - } catch (ParserConfigurationException pce) { - return options.getDocumentWithErrorsOrRaiseException(context, pce); - } catch (SAXException saxe) { - return options.getDocumentWithErrorsOrRaiseException(context, saxe); - } catch (IOException ioe) { - return options.getDocumentWithErrorsOrRaiseException(context, ioe); - } + public static IRubyObject read_memory(ThreadContext context, + IRubyObject cls, + IRubyObject[] args) { + return do_parse(context, cls, args); } + @JRubyMethod public static IRubyObject serialize(ThreadContext context, IRubyObject htmlDoc) { throw context.getRuntime().newNotImplementedError("not implemented"); } -} \ No newline at end of file + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + Document doc = getDocument(); + DocumentType dtd = doc.getDoctype(); + + if(dtd != null) { + ctx.append("\n"); + } + + this.saveNodeListContent(context, + (XmlNodeSet) this.children(context), ctx); + ctx.append("\n"); + } +} diff --git a/ext/java/nokogiri/HtmlElementDescription.java b/ext/java/nokogiri/HtmlElementDescription.java new file mode 100644 index 00000000000..eb6fbae1cf7 --- /dev/null +++ b/ext/java/nokogiri/HtmlElementDescription.java @@ -0,0 +1,113 @@ +package nokogiri; + +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyMethod; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.cyberneko.html.HTMLElements; +import org.cyberneko.html.HTMLElements.Element; + +import java.util.Map; +import java.util.List; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Collections; + +import static org.jruby.javasupport.util.RuntimeHelpers.invoke; + +/** + * @author Patrick Mahoney + */ +public class HtmlElementDescription extends RubyObject { + + /** + * Stores memoized hash of element -> list of valid subelements. + */ + static protected Map> subElements; + static { + Map> _subElements = + new HashMap>(); + subElements = Collections.synchronizedMap(_subElements); + } + + protected HTMLElements.Element element; + + public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) { + super(runtime, rubyClass); + } + + /** + * Lookup the list of sub elements of code. If not + * already stored, iterate through all elements to find valid + * subelements; save this list and return it. + */ + protected static List findSubElements(HTMLElements.Element elem) { + List subs = subElements.get(elem.code); + + if (subs == null) { + subs = new ArrayList(); + + /* + * A bit of a hack. NekoHtml source code shows that + * UNKNOWN is the highest value element. We cannot access + * the list of elements directly because it's protected. + */ + for (short c = 0; c < HTMLElements.UNKNOWN; c++) { + HTMLElements.Element maybe_sub = + HTMLElements.getElement(c); + if (maybe_sub.isParent(elem)) { + subs.add(maybe_sub.name); + } + } + + subElements.put(elem.code, subs); + } + + return subs; + } + + @JRubyMethod(name="[]", meta=true) + public static IRubyObject get(ThreadContext context, + IRubyObject klazz, IRubyObject name) { + + HTMLElements.Element elem = HTMLElements.getElement(name.toString()); + if (elem == HTMLElements.NO_SUCH_ELEMENT) + return context.getRuntime().getNil(); + + HtmlElementDescription desc = + new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz); + desc.element = elem; + return desc; + } + + @JRubyMethod() + public IRubyObject name(ThreadContext context) { + return context.getRuntime().newString(element.name.toLowerCase()); + } + + @JRubyMethod(name="inline?") + public IRubyObject inline_eh(ThreadContext context) { + return context.getRuntime().newBoolean(element.isInline()); + } + + @JRubyMethod(name="empty?") + public IRubyObject empty_eh(ThreadContext context) { + return context.getRuntime().newBoolean(element.isEmpty()); + } + + @JRubyMethod() + public IRubyObject sub_elements(ThreadContext context) { + Ruby ruby = context.getRuntime(); + List subs = findSubElements(element); + IRubyObject[] ary = new IRubyObject[subs.size()]; + for (int i = 0; i < subs.size(); ++i) { + ary[i] = ruby.newString(subs.get(i)); + } + + return ruby.newArray(ary); + } + +} diff --git a/ext/java/nokogiri/HtmlEntityLookup.java b/ext/java/nokogiri/HtmlEntityLookup.java new file mode 100644 index 00000000000..a3b4bd698a4 --- /dev/null +++ b/ext/java/nokogiri/HtmlEntityLookup.java @@ -0,0 +1,44 @@ +package nokogiri; + +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyMethod; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.cyberneko.html.HTMLEntities; + +import static org.jruby.javasupport.util.RuntimeHelpers.invoke; + +/** + * @author Patrick Mahoney + */ +public class HtmlEntityLookup extends RubyObject { + + public HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) { + super(runtime, rubyClass); + } + + /** + * Looks up an HTML entity key. + * + * The description is a bit lacking. + */ + @JRubyMethod() + public IRubyObject get(ThreadContext context, IRubyObject key) { + Ruby ruby = context.getRuntime(); + String name = key.toString(); + int val = HTMLEntities.get(name); + if (val == -1) return ruby.getNil(); + + IRubyObject edClass = + ruby.getClassFromPath("Nokogiri::HTML::EntityDescription"); + IRubyObject edObj = invoke(context, edClass, "new", + ruby.newFixnum(val), ruby.newString(name), + ruby.newString(name + " entity")); + + return edObj; + } + +} diff --git a/ext/java/nokogiri/HtmlSaxParserContext.java b/ext/java/nokogiri/HtmlSaxParserContext.java index b9cce3723ae..12c1d486c29 100644 --- a/ext/java/nokogiri/HtmlSaxParserContext.java +++ b/ext/java/nokogiri/HtmlSaxParserContext.java @@ -1,94 +1,118 @@ package nokogiri; -import java.io.FileInputStream; -import java.io.FileNotFoundException; +import java.io.InputStream; import java.io.IOException; -import java.io.StringReader; + import nokogiri.internals.NokogiriHandler; +import org.apache.xerces.parsers.AbstractSAXParser; import org.cyberneko.html.parsers.SAXParser; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.RubyObjectAdapter; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; +import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import org.xml.sax.InputSource; +import org.xml.sax.ContentHandler; +import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; -import org.xml.sax.ext.DefaultHandler2; +import org.xml.sax.SAXNotSupportedException; +import org.xml.sax.SAXNotRecognizedException; import static org.jruby.javasupport.util.RuntimeHelpers.invoke; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; public class HtmlSaxParserContext extends XmlSaxParserContext { private SAXParser parser; - private InputSource source; public HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); + } - - this.parser = new SAXParser(); - - try{ - this.parser.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); - this.parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); - } catch(Exception ex) { - System.out.println("Problem while creating HTML SAX Parser: "+ex.toString()); - } - + @Override + protected AbstractSAXParser createParser() throws SAXException { + SAXParser parser = new SAXParser(); + + try{ + parser.setProperty( + "http://cyberneko.org/html/properties/names/elems", "lower"); + parser.setProperty( + "http://cyberneko.org/html/properties/names/attrs", "lower"); + return parser; + } catch(SAXException ex) { + throw new SAXException( + "Problem while creating HTML SAX Parser: " + ex.toString()); + } } @JRubyMethod(name="memory", meta=true) - public static IRubyObject parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding) { - String input = data.convertToString().asJavaString(); - - HtmlSaxParserContext ctx = new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); - - ctx.source = new InputSource(new StringReader(input)); - - return ctx; + public static IRubyObject parse_memory(ThreadContext context, + IRubyObject klazz, + IRubyObject data, + IRubyObject encoding) { + HtmlSaxParserContext ctx = + new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); + ctx.setInputSource(context, data); + return ctx; } @JRubyMethod(name="file", meta=true) - public static IRubyObject parse_file(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject encoding) { - String file = data.convertToString().asJavaString(); - - HtmlSaxParserContext ctx = new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); - - try { - ctx.source = new InputSource(new FileInputStream(file)); - } catch (FileNotFoundException ex) {} - + public static IRubyObject parse_file(ThreadContext context, + IRubyObject klazz, + IRubyObject data, + IRubyObject encoding) { + HtmlSaxParserContext ctx = + new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); + ctx.setInputSourceFile(context, data); return ctx; } - @JRubyMethod() - public IRubyObject parse_with(ThreadContext context, IRubyObject handlerRuby) { - Ruby ruby = context.getRuntime(); - - if(!invoke(context, handlerRuby, "kind_of?", - ruby.getClassFromPath("Nokogiri::XML::SAX::Parser")).isTrue()) { - throw ruby.newArgumentError("argument must be a Nokogiri::XML::SAX::Parser"); - } - - DefaultHandler2 handler = new NokogiriHandler(ruby, handlerRuby); - - this.parser.setContentHandler(handler); - this.parser.setErrorHandler(handler); + @JRubyMethod(name="io", meta=true) + public static IRubyObject parse_io(ThreadContext context, + IRubyObject klazz, + IRubyObject data, + IRubyObject enc) { + //int encoding = (int)enc.convertToInteger().getLongValue(); + HtmlSaxParserContext ctx = + new HtmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); + ctx.setInputSource(context, data); + return ctx; + } - try{ - this.parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler); - } catch(Exception ex) { - System.out.println("Problem while creating HTML SAX Parser: "+ex.toString()); - } + /** + * Create a new parser context that will read from a raw input + * stream. Not a JRuby method. Meant to be run in a separate + * thread by XmlSaxPushParser. + */ + public static IRubyObject parse_stream(ThreadContext context, + IRubyObject klazz, + InputStream stream) { + HtmlSaxParserContext ctx = + new HtmlSaxParserContext(context.getRuntime(), (RubyClass)klazz); + ctx.setInputSource(stream); + return ctx; + } - try{ - this.parser.parse(this.source); - } catch(SAXException se) { - throw RaiseException.createNativeRaiseException(ruby, se); - } catch(IOException ioe) { - throw ruby.newIOErrorFromException(ioe); - } + @Override + protected void preParse(ThreadContext context, + IRubyObject handlerRuby, + NokogiriHandler handler) { + final String path = "Nokogiri::XML::FragmentHandler"; + final String docFrag = + "http://cyberneko.org/html/features/balance-tags/document-fragment"; + RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter(); + IRubyObject doc = adapter.getInstanceVariable(handlerRuby, "@document"); + RubyModule mod = + context.getRuntime().getClassFromPath(path); + try { + if (doc != null && !doc.isNil() && adapter.isKindOf(doc, mod)) + parser.setFeature(docFrag, true); + } catch (Exception e) { + // ignore + } + } - return this; - } } diff --git a/ext/java/nokogiri/NokogiriService.java b/ext/java/nokogiri/NokogiriService.java index 59ee701785b..0dc5f34553d 100644 --- a/ext/java/nokogiri/NokogiriService.java +++ b/ext/java/nokogiri/NokogiriService.java @@ -25,12 +25,17 @@ public static void init(Ruby ruby) { RubyModule html = nokogiri.defineModuleUnder("HTML"); RubyClass node = xml.defineClassUnder("Node", ruby.getObject(), XML_NODE_ALLOCATOR); - + RubyClass char_data = xml.defineClassUnder("CharacterData", node, null); + + init_encoding_handler(ruby, nokogiri); init_xml_node(ruby, node); init_xml_attr(ruby, xml, node); init_xml_comment(ruby, xml, node); + init_xml_processing_instruction(ruby, xml, node); RubyClass document = init_xml_document(ruby, xml, node); init_html_document(ruby, html, document); + init_html_element_description(ruby, html); + init_html_entity_lookup(ruby, html); init_xml_document_fragment(ruby, xml, node); init_xml_dtd(ruby, xml, node); init_xml_element(ruby, xml, node); @@ -38,17 +43,28 @@ public static void init(Ruby ruby) { init_xml_namespace(ruby, xml); init_xml_node_set(ruby, xml); init_xml_reader(ruby, xml); + init_xml_attribute_decl(ruby, xml, node); + init_xml_element_decl(ruby, xml, node); + init_xml_entity_decl(ruby, xml, node); + init_xml_element_content(ruby, xml); RubyClass xmlSaxParser = init_xml_sax_parser(ruby, xml); + init_xml_sax_push_parser(ruby, xml); init_html_sax_parser(ruby, html, xmlSaxParser); RubyClass schema = init_xml_schema(ruby, xml); init_xml_relaxng(ruby, xml, schema); init_xml_syntax_error(ruby, xml, nokogiri); - RubyClass text = init_xml_text(ruby, xml, node); + RubyClass text = init_xml_text(ruby, xml, char_data, node); init_xml_cdata(ruby, xml, text); init_xml_xpath(ruby, xml); init_xml_xpath_context(ruby, xml); init_xslt_stylesheet(ruby, nokogiri); - init_xml_attribute_decl(ruby, node); + } + + public static void init_encoding_handler(Ruby ruby, RubyModule nokogiri) { + RubyModule encHandler = nokogiri.defineClassUnder("EncodingHandler", + ruby.getObject(), + ENCODING_HANDLER_ALLOCATOR); + encHandler.defineAnnotatedMethods(EncodingHandler.class); } public static void init_html_document(Ruby ruby, RubyModule html, RubyClass document) { @@ -64,6 +80,20 @@ public static void init_html_sax_parser(Ruby ruby, RubyModule html, RubyClass xm saxParser.defineAnnotatedMethods(HtmlSaxParserContext.class); } + public static void init_html_element_description(Ruby ruby, RubyModule html) { + RubyModule htmlElemDesc = + html.defineClassUnder("ElementDescription", ruby.getObject(), + HTML_ELEMENT_DESCRIPTION_ALLOCATOR); + htmlElemDesc.defineAnnotatedMethods(HtmlElementDescription.class); + } + + public static void init_html_entity_lookup(Ruby ruby, RubyModule html) { + RubyModule htmlEntityLookup = + html.defineClassUnder("EntityLookup", ruby.getObject(), + HTML_ENTITY_LOOKUP_ALLOCATOR); + htmlEntityLookup.defineAnnotatedMethods(HtmlEntityLookup.class); + } + public static void init_xml_attr(Ruby ruby, RubyModule xml, RubyClass node){ RubyClass attr = xml.defineClassUnder("Attr", node, XML_ATTR_ALLOCATOR); @@ -82,6 +112,14 @@ public static void init_xml_comment(Ruby ruby, RubyModule xml, RubyClass node) { comment.defineAnnotatedMethods(XmlComment.class); } + public static void init_xml_processing_instruction(Ruby ruby, + RubyModule xml, + RubyClass node) { + RubyModule pi = xml.defineClassUnder("ProcessingInstruction", node, + XML_PROCESSING_INSTRUCTION_ALLOCATOR); + pi.defineAnnotatedMethods(XmlProcessingInstruction.class); + } + public static RubyClass init_xml_document(Ruby ruby, RubyModule xml, RubyClass node) { RubyClass document = xml.defineClassUnder("Document", node, XML_DOCUMENT_ALLOCATOR); @@ -129,10 +167,6 @@ public static void init_xml_node_set(Ruby ruby, RubyModule xml) { nodeSet.defineAnnotatedMethods(XmlNodeSet.class); } - - public static void init_xml_attribute_decl(Ruby ruby, RubyClass node) { - node.defineAnnotatedMethods(XmlAttributeDecl.class); - } public static void init_xml_reader(Ruby ruby, RubyModule xml) { RubyClass reader = xml.defineClassUnder("Reader", ruby.getObject(), XML_READER_ALLOCATOR); @@ -140,6 +174,37 @@ public static void init_xml_reader(Ruby ruby, RubyModule xml) { reader.defineAnnotatedMethods(XmlReader.class); } + public static void init_xml_attribute_decl(Ruby ruby, RubyModule xml, + RubyClass node) { + RubyClass attrDecl = xml.defineClassUnder("AttributeDecl", node, + XML_ATTRIBUTE_DECL_ALLOCATOR); + + attrDecl.defineAnnotatedMethods(XmlAttributeDecl.class); + } + + public static void init_xml_element_decl(Ruby ruby, RubyModule xml, + RubyClass node) { + RubyClass attrDecl = xml.defineClassUnder("ElementDecl", node, + XML_ELEMENT_DECL_ALLOCATOR); + + attrDecl.defineAnnotatedMethods(XmlElementDecl.class); + } + + public static void init_xml_entity_decl(Ruby ruby, RubyModule xml, + RubyClass node) { + RubyClass attrDecl = xml.defineClassUnder("EntityDecl", node, + XML_ENTITY_DECL_ALLOCATOR); + + attrDecl.defineAnnotatedMethods(XmlEntityDecl.class); + } + + public static void init_xml_element_content(Ruby ruby, RubyModule xml) { + RubyClass ec = xml.defineClassUnder("ElementContent", + ruby.getObject(), + XML_ELEMENT_CONTENT_ALLOCATOR); + ec.defineAnnotatedMethods(XmlElementContent.class); + } + public static void init_xml_relaxng(Ruby ruby, RubyModule xml, RubyClass schema) { RubyClass relaxng = xml.defineClassUnder("RelaxNG", schema, XML_RELAXNG_ALLOCATOR); @@ -155,6 +220,16 @@ public static RubyClass init_xml_sax_parser(Ruby ruby, RubyModule xml) { return saxParser; } + public static void init_xml_sax_push_parser(Ruby ruby, RubyModule xml) { + RubyModule xmlSax = xml.defineModuleUnder("SAX"); + // Nokogiri::XML::SAX::PushParser is defined by nokogiri/xml/sax/pushparser.rb + RubyClass pushParser = + xmlSax.defineClassUnder("PushParser", + ruby.getObject(), + XML_SAXPUSHPARSER_ALLOCATOR); + pushParser.defineAnnotatedMethods(XmlSaxPushParser.class); + } + public static RubyClass init_xml_schema(Ruby ruby, RubyModule xml) { RubyClass schema = xml.defineClassUnder("Schema", ruby.getObject(), XML_SCHEMA_ALLOCATOR); @@ -171,9 +246,8 @@ public static void init_xml_syntax_error(Ruby ruby, RubyModule xml, RubyModule n syntaxError.defineAnnotatedMethods(XmlSyntaxError.class); } - public static RubyClass init_xml_text(Ruby ruby, RubyModule xml, RubyClass node) { - RubyClass character_data = xml.defineClassUnder("CharacterData", node, XML_TEXT_ALLOCATOR); - RubyClass text = xml.defineClassUnder("Text", character_data, XML_TEXT_ALLOCATOR); + public static RubyClass init_xml_text(Ruby ruby, RubyModule xml, RubyClass char_data, RubyClass node) { + RubyClass text = xml.defineClassUnder("Text", char_data, XML_TEXT_ALLOCATOR); text.defineAnnotatedMethods(XmlText.class); @@ -199,6 +273,12 @@ public static void init_xslt_stylesheet(Ruby ruby, RubyModule nokogiri) { stylesheet.defineAnnotatedMethods(XsltStylesheet.class); } + private static ObjectAllocator ENCODING_HANDLER_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new EncodingHandler(runtime, klazz, ""); + } + }; + private static ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { throw runtime.newNotImplementedError("not implemented"); @@ -211,6 +291,20 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { } }; + private static ObjectAllocator HTML_ELEMENT_DESCRIPTION_ALLOCATOR = + new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new HtmlElementDescription(runtime, klazz); + } + }; + + private static ObjectAllocator HTML_ENTITY_LOOKUP_ALLOCATOR = + new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new HtmlEntityLookup(runtime, klazz); + } + }; + private static ObjectAllocator XML_ATTR_ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz){ return new XmlAttr(runtime, klazz); @@ -229,6 +323,13 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { } }; + private static ObjectAllocator XML_PROCESSING_INSTRUCTION_ALLOCATOR = + new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + throw runtime.newNotImplementedError("not implemented"); + } + }; + private static ObjectAllocator XML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { throw runtime.newNotImplementedError("not implemented"); @@ -281,7 +382,30 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { return new XmlReader(runtime, klazz); } + }; + + private static ObjectAllocator XML_ATTRIBUTE_DECL_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlAttributeDecl(runtime, klazz); + } + }; + + private static ObjectAllocator XML_ELEMENT_DECL_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlElementDecl(runtime, klazz); + } + }; + private static ObjectAllocator XML_ENTITY_DECL_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlEntityDecl(runtime, klazz); + } + }; + + private static ObjectAllocator XML_ELEMENT_CONTENT_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + throw runtime.newNotImplementedError("not implemented"); + } }; private static ObjectAllocator XML_RELAXNG_ALLOCATOR = new ObjectAllocator() { @@ -296,6 +420,12 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { } }; + private static ObjectAllocator XML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlSaxPushParser(runtime, klazz); + } + }; + private static ObjectAllocator XML_SCHEMA_ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { return new XmlSchema(runtime, klazz); @@ -304,7 +434,7 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { private static ObjectAllocator XML_SYNTAXERROR_ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - throw runtime.newNotImplementedError("not implemented"); + return new XmlSyntaxError(runtime, klazz); } }; diff --git a/ext/java/nokogiri/XmlAttr.java b/ext/java/nokogiri/XmlAttr.java index ce299a84712..06709e17082 100644 --- a/ext/java/nokogiri/XmlAttr.java +++ b/ext/java/nokogiri/XmlAttr.java @@ -1,5 +1,6 @@ package nokogiri; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyModule; @@ -7,10 +8,19 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Attr; +import org.w3c.dom.Element; import org.w3c.dom.Node; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; + public class XmlAttr extends XmlNode{ + public static final String[] HTML_BOOLEAN_ATTRS = { + "checked", "compact", "declare", "defer", "disabled", "ismap", + "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", + "selected" + }; + public XmlAttr(Ruby ruby, Node attr){ super(ruby, ((RubyModule) ruby.getModule("Nokogiri").getConstant("XML")).getClass("Attr"), attr); } @@ -30,16 +40,79 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj } XmlDocument xmlDoc = (XmlDocument)doc; + String str = rubyStringToString(content); + Node attr = xmlDoc.getDocument().createAttribute(str); return new XmlAttr(context.getRuntime(), - xmlDoc.getDocument().createAttribute(content.convertToString().asJavaString())); + (RubyClass) cls, + attr); + } + + public boolean isHtmlBooleanAttr() { + String name = node.getNodeName().toLowerCase(); + + for(String s : HTML_BOOLEAN_ATTRS) { + if(s.equals(name)) return true; + } + + return false; + } + + + private String serializeAttrTextContent(String s) { + if (s == null) return ""; + + char[] c = s.toCharArray(); + StringBuffer buffer = new StringBuffer(c.length); + + for(int i = 0; i < c.length; i++) { + switch(c[i]){ + case '\n': buffer.append(" "); break; + case '\r': buffer.append(" "); break; + case '\t': buffer.append(" "); break; + //case '"': buffer.append("""); break; + // TODO: is replacing '"' with '%22' always correct? + case '"': buffer.append("%22"); break; + case '<': buffer.append("<"); break; + case '>': buffer.append(">"); break; + case '&': buffer.append("&"); break; + default: buffer.append(c[i]); + } + } + + return buffer.toString(); } @JRubyMethod(name="value=") - public IRubyObject value_set(ThreadContext context, IRubyObject content){ - Attr current = (Attr) node(); - current.setValue(this.encode_special_chars(context, content).convertToString().asJavaString()); - this.internalNode.setContent(content); + public IRubyObject value_set(ThreadContext context, IRubyObject content){ + Attr attr = (Attr) node; + attr.setValue(this.encode_special_chars(context, content).convertToString().asJavaString()); + setContent(content); return content; } + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + Attr attr = (Attr) node; + + ctx.maybeSpace(); + ctx.append(rubyStringToString(getNodeName(context))); + + if (!ctx.asHtml() || !isHtmlBooleanAttr()) { + ctx.append("="); + ctx.append("\""); + ctx.append(serializeAttrTextContent(attr.getValue())); + ctx.append("\""); + } + } + + @Override + public IRubyObject unlink(ThreadContext context) { + Attr attr = (Attr) node; + Element parent = attr.getOwnerElement(); + parent.removeAttributeNode(attr); + + return this; + } + } diff --git a/ext/java/nokogiri/XmlAttributeDecl.java b/ext/java/nokogiri/XmlAttributeDecl.java index 8e3b954933c..e983774ecb1 100644 --- a/ext/java/nokogiri/XmlAttributeDecl.java +++ b/ext/java/nokogiri/XmlAttributeDecl.java @@ -1,41 +1,100 @@ package nokogiri; -import nokogiri.internals.XmlAttributeDeclImpl; +import java.util.ArrayList; + import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Element; import org.w3c.dom.Node; /** - * ATTLIST declaration of DTD - * - * @author Yoko Harada + * DTD attribute declaration. + * + * @author Patrick Mahoney */ -public class XmlAttributeDecl extends XmlNode implements XmlDtdDeclaration { - private Node parent; +public class XmlAttributeDecl extends XmlNode { + + public static RubyClass getRubyClass(Ruby ruby) { + return (RubyClass)ruby.getClassFromPath("Nokogiri::XML::AttributeDecl"); + } + + public XmlAttributeDecl(Ruby ruby, RubyClass klass) { + super(ruby, klass); + throw ruby.newRuntimeError("node required"); + } + + /** + * Initialize based on an attributeDecl node from a NekoDTD parsed + * DTD. + * + * Internally, XmlAttributeDecl combines these into a single node. + */ + public XmlAttributeDecl(Ruby ruby, RubyClass klass, Node attrDeclNode) { + super(ruby, klass, attrDeclNode); + } + + public static IRubyObject create(ThreadContext context, Node attrDeclNode) { + XmlAttributeDecl self = + new XmlAttributeDecl(context.getRuntime(), + getRubyClass(context.getRuntime()), + attrDeclNode); + return self; + } - public XmlAttributeDecl(Ruby runtime, RubyClass klazz) { - super(runtime, klazz); + @Override + @JRubyMethod + public IRubyObject node_name(ThreadContext context) { + return attribute_name(context); } - public XmlAttributeDecl(Ruby runtime, RubyClass klazz, Node attribute, Node parent) { - super(runtime, klazz, attribute); - this.parent = parent; - internalNode = new XmlAttributeDeclImpl(runtime, attribute); + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { + throw context.getRuntime() + .newRuntimeError("cannot change name of DTD decl"); } - - public Node getParent() { - return parent; + + public IRubyObject element_name(ThreadContext context) { + return getAttribute(context, "ename"); + } + + public IRubyObject attribute_name(ThreadContext context) { + return getAttribute(context, "aname"); + } + + @JRubyMethod + public IRubyObject attribute_type(ThreadContext context) { + return getAttribute(context, "atype"); } @JRubyMethod(name="default") - public IRubyObject op_default(ThreadContext context) { - return ((XmlAttributeDeclImpl)internalNode).getDefault(context); + public IRubyObject default_value(ThreadContext context) { + return getAttribute(context, "default"); } - - public void setDeclaration(String declaration) { - ((XmlAttributeDeclImpl)internalNode).setDeclaration(declaration); + + /** + * FIXME: will enumerations all be of the simple (val1|val2|val3) + * type string? + */ + @JRubyMethod + public IRubyObject enumeration(ThreadContext context) { + RubyArray enumVals = RubyArray.newArray(context.getRuntime()); + String atype = ((Element)node).getAttribute("atype"); + + if (atype != null && !atype.isEmpty() && atype.charAt(0) == '(') { + // removed enclosing parens + String valueStr = atype.substring(1, atype.length() - 1); + String[] values = valueStr.split("\\|"); + for (int i = 0; i < values.length; i++) { + enumVals.append(context.getRuntime().newString(values[i])); + } + } + + return enumVals; } + } diff --git a/ext/java/nokogiri/XmlCdata.java b/ext/java/nokogiri/XmlCdata.java index 14a65322c4b..7bea47c3299 100644 --- a/ext/java/nokogiri/XmlCdata.java +++ b/ext/java/nokogiri/XmlCdata.java @@ -1,11 +1,13 @@ package nokogiri; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.javasupport.util.RuntimeHelpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.CDATASection; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -21,7 +23,7 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj XmlDocument xmlDoc =(XmlDocument) ((XmlNode) doc).document(context); Document document = xmlDoc.getDocument(); Node node = document.createCDATASection((text.isNil()) ? null : text.convertToString().asJavaString()); - XmlNode cdata = (XmlNode) XmlNode.constructNode(context.getRuntime(), node); + XmlNode cdata = new XmlCdata(context.getRuntime(), (RubyClass) cls, node); RuntimeHelpers.invoke(context, cdata, "initialize", args); @@ -29,4 +31,17 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj return cdata; } -} \ No newline at end of file + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + CDATASection cdata = (CDATASection) node; + + if(cdata.getData().length() == 0) { + ctx.append(""); + } else { + ctx.append(""); + } + } +} diff --git a/ext/java/nokogiri/XmlComment.java b/ext/java/nokogiri/XmlComment.java index 962bab94cc6..98eb590b400 100644 --- a/ext/java/nokogiri/XmlComment.java +++ b/ext/java/nokogiri/XmlComment.java @@ -1,5 +1,6 @@ package nokogiri; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.anno.JRubyMethod; @@ -18,6 +19,16 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj XmlDocument xmlDoc = (XmlDocument)doc; Document document = xmlDoc.getDocument(); Node node = document.createComment(text.convertToString().asJavaString()); - return XmlNode.constructNode(context.getRuntime(), node); + return new XmlComment(context.getRuntime(), (RubyClass) cls, node); + } + + @Override + public boolean isComment() { return true; } + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + ctx.append(""); } } diff --git a/ext/java/nokogiri/XmlDocument.java b/ext/java/nokogiri/XmlDocument.java index 45a17c76b7a..2c76c3db237 100644 --- a/ext/java/nokogiri/XmlDocument.java +++ b/ext/java/nokogiri/XmlDocument.java @@ -2,83 +2,92 @@ import java.io.ByteArrayInputStream; import java.io.IOException; - +import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; + import nokogiri.internals.NokogiriUserDataHandler; -import nokogiri.internals.ParseOptions; -import nokogiri.internals.XmlDocumentImpl; -import nokogiri.internals.XmlEmptyDocumentImpl; +import nokogiri.internals.XmlDomParserContext; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; -import org.jruby.javasupport.JavaUtil; import org.jruby.javasupport.util.RuntimeHelpers; import org.jruby.runtime.Arity; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import org.jruby.util.ByteList; import org.w3c.dom.Document; import org.w3c.dom.Node; -import org.xml.sax.SAXException; + +import static nokogiri.internals.NokogiriHelpers.stringOrNil; public class XmlDocument extends XmlNode { - protected Document document; + /* UserData keys for storing extra info in the document node. */ + public final static String DTD_RAW_DOCUMENT = "DTD_RAW_DOCUMENT"; + protected final static String DTD_INTERNAL_SUBSET = "DTD_INTERNAL_SUBSET"; + protected final static String DTD_EXTERNAL_SUBSET = "DTD_EXTERNAL_SUBSET"; + private static boolean substituteEntities = false; private static boolean loadExternalSubset = false; // TODO: Verify this. + /** cache variables */ + protected IRubyObject encoding = null; + protected IRubyObject url = null; + public XmlDocument(Ruby ruby, Document document) { this(ruby, (RubyClass) ruby.getClassFromPath("Nokogiri::XML::Document"), document); } public XmlDocument(Ruby ruby, RubyClass klass, Document document) { super(ruby, klass, document); - this.document = document; // if(document == null) { // this.internalNode = new XmlEmptyDocumentImpl(ruby, document); // } else { - this.internalNode = new XmlDocumentImpl(ruby, document); - document.setUserData(NokogiriUserDataHandler.CACHED_NODE, this, - new NokogiriUserDataHandler(ruby)); // } setInstanceVariable("@decorators", ruby.getNil()); } - @Override - protected IRubyObject dup_implementation(ThreadContext context, boolean deep) { - return ((XmlDocumentImpl) this.internalNode).dup_impl(context, this, deep, this.getType()); - } +// @Override +// protected IRubyObject dup_implementation(ThreadContext context, boolean deep) { +// return ((XmlDocumentImpl) this.internalNode).dup_impl(context, this, deep, this.getType()); +// } public Document getDocument() { - return document; + return (Document) node; } - @Override - protected Node getNodeToCompare() { - return this.document; + public void setUrl(IRubyObject url) { + this.url = url; } - protected XmlDocumentImpl internals() { - return (XmlDocumentImpl) this.internalNode; + protected IRubyObject getUrl() { + return this.url; } - protected void setUrl(IRubyObject url) { - this.internals().url_set(url); + @JRubyMethod + public IRubyObject url(ThreadContext context) { + return getUrl(); + } + + protected static Document createNewDocument() { + try { + return DocumentBuilderFactory.newInstance().newDocumentBuilder() + .newDocument(); + } catch (ParserConfigurationException e) { + return null; // this will end is disaster... + } } @JRubyMethod(name="new", meta = true, rest = true, required=0) public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args) { XmlDocument doc = null; try { - - Document docNode = (new ParseOptions(0)).getDocumentBuilder().newDocument(); + Document docNode = createNewDocument(); doc = new XmlDocument(context.getRuntime(), (RubyClass) cls, - docNode); - doc.internalNode = new XmlEmptyDocumentImpl(context.getRuntime(), - docNode); + docNode); } catch (Exception ex) { throw context.getRuntime().newRuntimeError("couldn't create document: "+ex.toString()); } @@ -88,12 +97,6 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj return doc; } - @Override - @JRubyMethod - public IRubyObject children(ThreadContext context) { - return this.internals().children(context, this); - } - @Override @JRubyMethod public IRubyObject document(ThreadContext context) { @@ -102,13 +105,21 @@ public IRubyObject document(ThreadContext context) { @JRubyMethod(name="encoding=") public IRubyObject encoding_set(ThreadContext context, IRubyObject encoding) { - internals().encoding_set(context, this, encoding); - return encoding; + this.encoding = encoding; + return this; } @JRubyMethod public IRubyObject encoding(ThreadContext context) { - return internals().encoding(context, this); + if (this.encoding == null) { + if (getDocument().getXmlEncoding() == null) { + this.encoding = context.getRuntime().getNil(); + } else { + this.encoding = context.getRuntime().newString(getDocument().getXmlEncoding()); + } + } + + return this.encoding; } @JRubyMethod(meta = true) @@ -117,75 +128,83 @@ public static IRubyObject load_external_subsets_set(ThreadContext context, IRuby return context.getRuntime().getNil(); } - @JRubyMethod(meta = true, rest = true) - public static IRubyObject read_io(ThreadContext context, IRubyObject cls, IRubyObject[] args) { - + /** + * TODO: handle encoding? + * + * @param args[0] a Ruby IO or StringIO + * @param args[1] url or nil + * @param args[2] encoding + * @param args[3] bitset of parser options + */ + public static IRubyObject newFromData(ThreadContext context, + IRubyObject klass, + IRubyObject[] args) { Ruby ruby = context.getRuntime(); - - IRubyObject content = RuntimeHelpers.invoke(context, args[0], "read"); - args[0] = content; - - return read_memory(context, cls, args); - - - -// Arity.checkArgumentCount(ruby, args, 4, 4); -// ParseOptions options = new ParseOptions(args[3]); -// try { -// Document document; -// if (args[0] instanceof RubyIO) { -// RubyIO io = (RubyIO)args[0]; -// document = options.parse(io.getInStream()); -// XmlDocument doc = new XmlDocument(ruby, (RubyClass)cls, document); -// doc.setUrl(args[1]); -// options.addErrorsIfNecessary(context, doc); -// return doc; -// } else { -// throw ruby.newTypeError("Only IO supported for Document.read_io currently"); -// } -// } catch (ParserConfigurationException pce) { -// return options.getDocumentWithErrorsOrRaiseException(context, pce); -// } catch (SAXException saxe) { -// return options.getDocumentWithErrorsOrRaiseException(context, saxe); -// } catch (IOException ioe) { -// return options.getDocumentWithErrorsOrRaiseException(context, ioe); -// } + Arity.checkArgumentCount(ruby, args, 4, 4); + XmlDomParserContext ctx = + new XmlDomParserContext(ruby, args[3]); + ctx.setInputSource(context, args[0]); + return ctx.parse(context, klass, args[1]); } @JRubyMethod(meta = true, rest = true) - public static IRubyObject read_memory(ThreadContext context, IRubyObject cls, IRubyObject[] args) { - - Ruby ruby = context.getRuntime(); - Arity.checkArgumentCount(ruby, args, 4, 4); - ParseOptions options = new ParseOptions(args[3]); - try { - Document document; - RubyString content = args[0].convertToString(); - ByteList byteList = content.getByteList(); - ByteArrayInputStream bais = new ByteArrayInputStream(byteList.unsafeBytes(), byteList.begin(), byteList.length()); - document = options.parse(bais); - XmlDocument doc = new XmlDocument(ruby, (RubyClass)cls, document); - doc.setUrl(args[1]); - options.addErrorsIfNecessary(context, doc); - return doc; - } catch (ParserConfigurationException pce) { - return options.getDocumentWithErrorsOrRaiseException(context, pce); - } catch (SAXException saxe) { - return options.getDocumentWithErrorsOrRaiseException(context, saxe); - } catch (IOException ioe) { - return options.getDocumentWithErrorsOrRaiseException(context, ioe); - } + public static IRubyObject read_io(ThreadContext context, + IRubyObject klass, + IRubyObject[] args) { + return newFromData(context, klass, args); + } + + @JRubyMethod(meta = true, rest = true) + public static IRubyObject read_memory(ThreadContext context, + IRubyObject klass, + IRubyObject[] args) { + return newFromData(context, klass, args); + } + + /** not a JRubyMethod */ + public static IRubyObject read_memory(ThreadContext context, + IRubyObject[] args) { + return read_memory(context, + context.getRuntime() + .getClassFromPath("Nokogiri::XML::Document"), + args); } @JRubyMethod public IRubyObject root(ThreadContext context) { - return internals().root(context, this); + Node rootNode = getDocument().getDocumentElement(); + if (rootNode == null) + return context.getRuntime().getNil(); + else + return XmlNode.fromNodeOrCreate(context, rootNode); } @JRubyMethod(name="root=") - public IRubyObject root_set(ThreadContext context, IRubyObject root) { - internals().root_set(context, this, root); - return root; + public IRubyObject root_set(ThreadContext context, IRubyObject newRoot_) { + XmlNode newRoot = asXmlNode(context, newRoot_); + + IRubyObject root = root(context); + if (root.isNil()) { + Node newRootNode; + if (getDocument() == newRoot.getOwnerDocument()) { + newRootNode = newRoot.getNode(); + } else { + // must copy otherwise newRoot may exist in two places + // with different owner document. + newRootNode = getDocument().importNode(newRoot.getNode(), true); + } + add_child_node(context, fromNodeOrCreate(context, newRootNode)); + } else { + Node rootNode = asXmlNode(context, root).node; + fromNode(context, rootNode).replace_node(context, newRoot); + } + + return newRoot; + } + + @JRubyMethod + public IRubyObject version(ThreadContext context) { + return stringOrNil(context.getRuntime(), getDocument().getXmlVersion()); } @JRubyMethod(meta = true) @@ -194,16 +213,75 @@ public static IRubyObject substitute_entities_set(ThreadContext context, IRubyOb return context.getRuntime().getNil(); } - @JRubyMethod - public IRubyObject url(ThreadContext context) { - return this.internals().url(); + public IRubyObject getInternalSubset(ThreadContext context) { + IRubyObject dtd = + (IRubyObject) node.getUserData(DTD_INTERNAL_SUBSET); + + if (dtd == null) { + if (getDocument().getDoctype() == null) + dtd = context.getRuntime().getNil(); + else + dtd = XmlDtd.newFromInternalSubset(context.getRuntime(), + getDocument()); + + node.setUserData(DTD_INTERNAL_SUBSET, dtd, null); + } + + return dtd; } - - @JRubyMethod - public IRubyObject version(ThreadContext context) { - String version = document.getXmlVersion(); - if (version == null) return context.getRuntime().getNil(); - return JavaUtil.convertJavaToRuby(context.getRuntime(), version); + + public IRubyObject getExternalSubset(ThreadContext context) { + IRubyObject dtd = (IRubyObject) + node.getUserData(DTD_EXTERNAL_SUBSET); + + if (dtd == null) { + if (getDocument().getDoctype() == null) + dtd = context.getRuntime().getNil(); + else + dtd = XmlDtd.newFromExternalSubset(context.getRuntime(), + getDocument()); + + node.setUserData(DTD_EXTERNAL_SUBSET, dtd, null); + } + + return dtd; } -} \ No newline at end of file + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + if(!ctx.noDecl()) { + ctx.append("\n"); + } + + IRubyObject maybeRoot = root(context); + if (maybeRoot.isNil()) + throw context.getRuntime().newRuntimeError("no root document"); + + XmlNode root = (XmlNode) maybeRoot; + root.saveContent(context, ctx); + ctx.append("\n"); + } +} diff --git a/ext/java/nokogiri/XmlDocumentFragment.java b/ext/java/nokogiri/XmlDocumentFragment.java index e38250be179..94f1c0d4a44 100644 --- a/ext/java/nokogiri/XmlDocumentFragment.java +++ b/ext/java/nokogiri/XmlDocumentFragment.java @@ -5,7 +5,9 @@ package nokogiri; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.javasupport.util.RuntimeHelpers; @@ -59,4 +61,33 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj return fragment; } + //@Override + public void add_child(ThreadContext context, XmlNode child) { + // Some magic for DocumentFragment + + Ruby ruby = context.getRuntime(); + XmlNodeSet children = (XmlNodeSet) child.children(context); + + long length = children.length(); + + RubyArray childrenArray = children.convertToArray(); + + if(length != 0) { + for(int i = 0; i < length; i++) { + XmlNode item = (XmlNode) ((XmlNode) childrenArray.aref(ruby.newFixnum(i))).dup(context); + add_child(context, item); + } + } + } + + @Override + public void relink_namespace(ThreadContext context) { + ((XmlNodeSet) children(context)).relink_namespace(context); + } + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + saveNodeListContent(context, (XmlNodeSet) children(context), ctx); + } + } diff --git a/ext/java/nokogiri/XmlDtd.java b/ext/java/nokogiri/XmlDtd.java index db7b70bb474..15fa88ab5a3 100644 --- a/ext/java/nokogiri/XmlDtd.java +++ b/ext/java/nokogiri/XmlDtd.java @@ -1,51 +1,397 @@ package nokogiri; -import nokogiri.internals.XmlDocumentTypeImpl; - +import org.apache.xerces.xni.QName; +import org.cyberneko.dtd.DTDConfiguration; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; +import org.jruby.RubyHash; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.Visibility; import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentType; +import org.w3c.dom.Element; import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import static nokogiri.internals.NokogiriHelpers.stringOrNil; +import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil; +import static org.jruby.javasupport.util.RuntimeHelpers.invoke; public class XmlDtd extends XmlNode { + protected RubyArray allDecls = null; + + /** cache of children, Nokogiri::XML::NodeSet */ + protected IRubyObject children = null; + + /** cache of name => XmlAttributeDecl */ + protected RubyHash attributes = null; + + /** cache of name => XmlElementDecl */ + protected RubyHash elements = null; + + /** cache of name => XmlEntityDecl */ + protected RubyHash entities = null; + + /** cache of name => Nokogiri::XML::Notation */ + protected RubyHash notations = null; + protected RubyClass notationClass; + + /** temporary store of content models before they are added to + * their XmlElementDecl. */ + protected RubyHash contentModels; + + /** node name */ + protected IRubyObject name; + + /** public ID (or external ID) */ + protected IRubyObject pubId; + + /** system ID */ + protected IRubyObject sysId; + + public static RubyClass getClass(Ruby ruby) { + return (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DTD"); + } + public XmlDtd(Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); } - public XmlDtd(Ruby ruby, RubyClass rubyClass, Node node) { - super(ruby, rubyClass, node); + public XmlDtd(Ruby ruby) { + this(ruby, getClass(ruby), null); } - @Override + public XmlDtd(Ruby ruby, Node dtd) { + this(ruby, getClass(ruby), dtd); + } + + public XmlDtd(Ruby ruby, RubyClass rubyClass, Node dtd) { + super(ruby, rubyClass, dtd); + notationClass = (RubyClass) + ruby.getClassFromPath("Nokogiri::XML::Notation"); + + name = pubId = sysId = ruby.getNil(); + if (dtd == null) return; + + // This is the dtd declaration stored in the document; it + // contains the DTD name (root element) and public and system + // ids. The actual declarations are in the NekoDTD 'dtd' + // variable. I don't know of a way to consolidate the two. + + DocumentType otherDtd = dtd.getOwnerDocument().getDoctype(); + if (otherDtd != null) { + name = stringOrNil(ruby, otherDtd.getNodeName()); + pubId = nonEmptyStringOrNil(ruby, otherDtd.getPublicId()); + sysId = nonEmptyStringOrNil(ruby, otherDtd.getSystemId()); + } + } + + /** + * Create an unparented element that contains DTD declarations + * parsed from the internal subset attached as user data to + * doc. The attached dtd must be the tree from + * NekoDTD. The owner document of the returned tree will be + * doc. + * + * NekoDTD parser returns a new document node containing elements + * representing the dtd declarations. The plan is to get the root + * element and adopt it into the correct document, stipping the + * Document provided by NekoDTD. + * + */ + public static XmlDtd newFromInternalSubset(Ruby ruby, Document doc) { + Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); + if (dtdTree_ == null) + return new XmlDtd(ruby); + + Node dtdTree = (Node) dtdTree_; + Node dtd = getInternalSubset(dtdTree); + if (dtd == null) { + return new XmlDtd(ruby); + } else { + // Import the node into doc so it has the correct owner document. + dtd = doc.importNode(dtd, true); + return new XmlDtd(ruby, dtd); + } + } + + public static IRubyObject newFromExternalSubset(Ruby ruby, Document doc) { + Object dtdTree_ = doc.getUserData(XmlDocument.DTD_RAW_DOCUMENT); + if (dtdTree_ == null) { + return ruby.getNil(); + } + + Node dtdTree = (Node) dtdTree_; + Node dtd = getExternalSubset(dtdTree); + if (dtd == null) { + return ruby.getNil(); + } else if (!dtd.hasChildNodes()) { + return ruby.getNil(); + } else { + // Import the node into doc so it has the correct owner document. + dtd = doc.importNode(dtd, true); + return new XmlDtd(ruby, dtd); + } + } + + /* + * dtd is the document node of a NekoDTD tree. + * NekoDTD tree looks like this: + * + *
+     * [#document: null]
+     *   [#comment: ...]
+     *   [#comment: ...]
+     *   [dtd: null]   // a DocumentType; isDTD(node) => false
+     *   [dtd: null]   // root of dtd, an Element node; isDTD(node) => true
+     *     ... decls, content models, etc. ...
+     *     [externalSubset: null] pubid="the pubid" sysid="the sysid"
+     *       ... external subset decls, etc. ...
+     * 
+ */ + protected static Node getInternalSubset(Node dtdTree) { + Node root; + for (root = dtdTree.getFirstChild(); ; root = root.getNextSibling()) { + if (root == null) + return null; + else if (isDTD(root)) + return root; // we have second dtd which is root + } + } + + protected static Node getExternalSubset(Node dtdTree) { + Node dtd = getInternalSubset(dtdTree); + if (dtd == null) return null; + for (Node ext = dtd.getFirstChild(); ; ext = ext.getNextSibling()) { + if (ext == null) + return null; + else if (isExternalSubset(ext)) + return ext; + } + } + + /** + * This overrides the #attributes method defined in + * lib/nokogiri/xml/node.rb. + */ @JRubyMethod public IRubyObject attributes(ThreadContext context) { - return ((XmlDocumentTypeImpl)internalNode).getAttributes(context); + if (attributes == null) extractDecls(context); + + return attributes; } @JRubyMethod public IRubyObject elements(ThreadContext context) { - return ((XmlDocumentTypeImpl)internalNode).getElements(context); + if (elements == null) extractDecls(context); + + return elements; } @JRubyMethod public IRubyObject entities(ThreadContext context) { - return ((XmlDocumentTypeImpl)internalNode).getEntities(context); + if (entities == null) extractDecls(context); + + return entities; } @JRubyMethod public IRubyObject notations(ThreadContext context) { - return ((XmlDocumentTypeImpl)internalNode).getNotations(context); + if (notations == null) extractDecls(context); + + return notations; + } + + /** + * Our "node" object is as-returned by NekoDTD. The actual + * "children" that we're interested in (Attribute declarations, + * etc.) are a few layers deep. + */ + @Override + @JRubyMethod + public IRubyObject children(ThreadContext context) { + if (children == null) extractDecls(context); + + return children; + } + + /** + * Returns the name of the dtd. + */ + @Override + @JRubyMethod + public IRubyObject node_name(ThreadContext context) { + return name; } - + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { + throw context.getRuntime() + .newRuntimeError("cannot change name of DTD"); + } + @JRubyMethod public IRubyObject system_id(ThreadContext context) { - return ((XmlDocumentTypeImpl)internalNode).getSystemId(context); + return sysId; } - + @JRubyMethod public IRubyObject external_id(ThreadContext context) { - return ((XmlDocumentTypeImpl)internalNode).getPublicId(context); + return pubId; + } + + public static boolean nameEquals(Node node, QName name) { + return name.localpart.equals(node.getNodeName()); + } + + public static boolean isExternalSubset(Node node) { + return nameEquals(node, DTDConfiguration.E_EXTERNAL_SUBSET); + } + + /** + * Checks instanceof Element so we return false for a DocumentType + * node (NekoDTD uses Element for all its nodes). + */ + public static boolean isDTD(Node node) { + return (node instanceof Element && + nameEquals(node, DTDConfiguration.E_DTD)); + } + + public static boolean isAttributeDecl(Node node) { + return nameEquals(node, DTDConfiguration.E_ATTRIBUTE_DECL); + } + + public static boolean isElementDecl(Node node) { + return nameEquals(node, DTDConfiguration.E_ELEMENT_DECL); } -} \ No newline at end of file + + public static boolean isEntityDecl(Node node) { + return (nameEquals(node, DTDConfiguration.E_INTERNAL_ENTITY_DECL) || + nameEquals(node, DTDConfiguration.E_UNPARSED_ENTITY_DECL)); + } + + public static boolean isNotationDecl(Node node) { + return nameEquals(node, DTDConfiguration.E_NOTATION_DECL); + } + + public static boolean isContentModel(Node node) { + return nameEquals(node, DTDConfiguration.E_CONTENT_MODEL); + } + + /** + * Recursively extract various DTD declarations and store them in + * the various collections. + */ + protected void extractDecls(ThreadContext context) { + Ruby runtime = context.getRuntime(); + + // initialize data structures + allDecls = RubyArray.newArray(runtime); + attributes = RubyHash.newHash(runtime); + elements = RubyHash.newHash(runtime); + entities = RubyHash.newHash(runtime); + notations = RubyHash.newHash(runtime); + contentModels = RubyHash.newHash(runtime); + children = runtime.getNil(); + + // recursively extract decls + if (getNode() == null) return; // leave all the decl hash's empty + extractDecls(context, getNode().getFirstChild()); + + // convert allDecls to a NodeSet + children = + new XmlNodeSet(context.getRuntime(), + (RubyClass) + runtime.getClassFromPath("Nokogiri::XML::NodeSet"), + allDecls); + + // add attribute decls as attributes to the matching element decl + RubyArray keys = attributes.keys(); + for (int i = 0; i < keys.getLength(); ++i) { + IRubyObject akey = keys.entry(i); + IRubyObject val; + + val = attributes.op_aref(context, akey); + if (val.isNil()) continue; + XmlAttributeDecl attrDecl = (XmlAttributeDecl) val; + IRubyObject ekey = attrDecl.element_name(context); + val = elements.op_aref(context, ekey); + if (val.isNil()) continue; + XmlElementDecl elemDecl = (XmlElementDecl) val; + + elemDecl.appendAttrDecl(attrDecl); + } + + // add content models to the matching element decl + keys = contentModels.keys(); + for (int i = 0; i < keys.getLength(); ++i) { + IRubyObject key = keys.entry(i); + IRubyObject cm = contentModels.op_aref(context, key); + + IRubyObject elem = elements.op_aref(context, key); + if (elem.isNil()) continue; + if (((XmlElementDecl)elem).isEmpty()) continue; + ((XmlElementDecl) elem).setContentModel(cm); + } + } + + /** + * The node is either the first child of the root dtd + * node (as returned by getInternalSubset()) or the first child of + * the external subset node (as returned by getExternalSubset()). + * + * This recursive function will not descend into an + * 'externalSubset' node, thus for an internal subset it only + * extracts nodes in the internal subset, and for an external + * subset it extracts everything and assumess node + * and all children are part of the external subset. + */ + protected void extractDecls(ThreadContext context, Node node) { + while (node != null) { + if (isExternalSubset(node)) { + return; + } else if (isAttributeDecl(node)) { + XmlAttributeDecl decl = (XmlAttributeDecl) + XmlAttributeDecl.create(context, node); + attributes.op_aset(context, decl.attribute_name(context), decl); + allDecls.append(decl); + } else if (isElementDecl(node)) { + XmlElementDecl decl = (XmlElementDecl) + XmlElementDecl.create(context, node); + elements.op_aset(context, decl.element_name(context), decl); + allDecls.append(decl); + } else if (isEntityDecl(node)) { + XmlEntityDecl decl = (XmlEntityDecl) + XmlEntityDecl.create(context, node); + entities.op_aset(context, decl.node_name(context), decl); + allDecls.append(decl); + } else if (isNotationDecl(node)) { + XmlNode tmp = (XmlNode) + XmlNode.constructNode(context.getRuntime(), node); + IRubyObject decl = invoke(context, notationClass, "new", + tmp.getAttribute(context, "name"), + tmp.getAttribute(context, "pubid"), + tmp.getAttribute(context, "sysid")); + notations.op_aset(context, + tmp.getAttribute(context, "name"), decl); + allDecls.append(decl); + } else if (isContentModel(node)) { + XmlElementContent cm = + new XmlElementContent(context.getRuntime(), + (XmlDocument) document(context), + node); + contentModels.op_aset(context, cm.element_name(context), cm); + } else { + // recurse + extractDecls(context, node.getFirstChild()); + } + + node = node.getNextSibling(); + } + } + +} diff --git a/ext/java/nokogiri/XmlDtdDeclaration.java b/ext/java/nokogiri/XmlDtdDeclaration.java deleted file mode 100644 index dd69322b25d..00000000000 --- a/ext/java/nokogiri/XmlDtdDeclaration.java +++ /dev/null @@ -1,5 +0,0 @@ -package nokogiri; - -public interface XmlDtdDeclaration { - public void setDeclaration(String declaration); -} diff --git a/ext/java/nokogiri/XmlElement.java b/ext/java/nokogiri/XmlElement.java index 43ccc6f6fce..315134b5335 100644 --- a/ext/java/nokogiri/XmlElement.java +++ b/ext/java/nokogiri/XmlElement.java @@ -5,13 +5,20 @@ package nokogiri; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Attr; import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; /** * @@ -29,17 +36,137 @@ public XmlElement(Ruby runtime, RubyClass klazz, Node element) { @Override @JRubyMethod - public IRubyObject add_namespace_definition(ThreadContext context, IRubyObject prefix, IRubyObject href) { - Element e = (Element) this.node(); + public IRubyObject add_namespace_definition(ThreadContext context, + IRubyObject prefix, + IRubyObject href) { + Element element = (Element) node; + + final String uri = "http://www.w3.org/2000/xmlns/"; + String qName = + prefix.isNil() ? "xmlns" : "xmlns:" + rubyStringToString(prefix); + element.setAttributeNS(uri, qName, rubyStringToString(href)); + + XmlNamespace ns = (XmlNamespace) + super.add_namespace_definition(context, prefix, href); + updateNodeNamespaceIfNecessary(context, ns); + + return ns; + } - String pref = "xmlns"; - - if(!prefix.isNil()) { - pref += ":"+prefix.convertToString().asJavaString(); + @Override + public boolean isElement() { return true; } + + @Override + public IRubyObject get(ThreadContext context, IRubyObject rbkey) { + String key = rubyStringToString(rbkey); + Element element = (Element) node; + String value = element.getAttribute(key); + if(!value.equals("")){ + return context.getRuntime().newString(value); } + return context.getRuntime().getNil(); + } + + @Override + public IRubyObject key_p(ThreadContext context, IRubyObject rbkey) { + String key = rubyStringToString(rbkey); + Element element = (Element) node; + return context.getRuntime().newBoolean(element.hasAttribute(key)); + } - e.setAttribute(pref, href.convertToString().asJavaString()); + @Override + public IRubyObject op_aset(ThreadContext context, + IRubyObject rbkey, + IRubyObject rbval) { + String key = rubyStringToString(rbkey); + String val = rubyStringToString(rbval); + Element element = (Element) node; + element.setAttribute(key, val); + return this; + } - return super.add_namespace_definition(context, prefix, href); + @Override + public IRubyObject remove_attribute(ThreadContext context, IRubyObject name) { + String key = name.convertToString().asJavaString(); + Element element = (Element) node; + element.removeAttribute(key); + return this; } + + @Override + public void relink_namespace(ThreadContext context) { + Element e = (Element) node; + + e.getOwnerDocument().renameNode(e, e.lookupNamespaceURI(e.getPrefix()), e.getNodeName()); + + if(e.hasAttributes()) { + NamedNodeMap attrs = e.getAttributes(); + + for(int i = 0; i < attrs.getLength(); i++) { + Attr attr = (Attr) attrs.item(i); + String nsUri = ""; + String prefix = attr.getPrefix(); + String nodeName = attr.getNodeName(); + if("xml".equals(prefix)) { + nsUri = "http://www.w3.org/XML/1998/namespace"; + } else if("xmlns".equals(prefix) || nodeName.equals("xmlns")) { + nsUri = "http://www.w3.org/2000/xmlns/"; + } else { + nsUri = attr.lookupNamespaceURI(nodeName); + } + + e.getOwnerDocument().renameNode(attr, nsUri, nodeName); + + } + } + + if(e.hasChildNodes()) { + ((XmlNodeSet) children(context)).relink_namespace(context); + } + } + + /** + * TODO: previous code handled elements with parent 'p' differently?. + */ + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + Node firstChild = node.getFirstChild(); + boolean empty = (firstChild == null); + short type = -1; + if (!empty) type = firstChild.getNodeType(); + boolean inline = (!empty && + (type == Node.TEXT_NODE || + type == Node.CDATA_SECTION_NODE || + type == Node.ENTITY_REFERENCE_NODE)); + + if (empty) { + ctx.emptyTagStart(node.getNodeName()); + } else if (inline) { + ctx.openTagInlineStart(node.getNodeName()); + } else { + ctx.openTagStart(node.getNodeName()); + } + + saveNodeListContent(context, (RubyArray) attribute_nodes(context), + ctx); + + if (empty) { + ctx.emptyTagEnd(node.getNodeName()); + return; + } else if (inline) { + ctx.openTagInlineEnd(); + } else { + ctx.openTagEnd(); + } + + saveNodeListContent(context, (XmlNodeSet) children(context), ctx); + + if (inline) { + ctx.closeTagInline(node.getNodeName()); + } else { + ctx.closeTag(node.getNodeName()); + } + + } + } diff --git a/ext/java/nokogiri/XmlElementContent.java b/ext/java/nokogiri/XmlElementContent.java new file mode 100644 index 00000000000..7794cf7a547 --- /dev/null +++ b/ext/java/nokogiri/XmlElementContent.java @@ -0,0 +1,353 @@ +package nokogiri; + +import org.cyberneko.dtd.DTDConfiguration; +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +import static nokogiri.internals.NokogiriHelpers.getPrefix; +import static nokogiri.internals.NokogiriHelpers.getLocalPart; +import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil; + +/** + * DTD element content model. This converts the nice tree of content + * model declarations returned by NekoDTD into the convoluted binary + * tree used by libxml. + * + * @author Patrick Mahoney + */ +public class XmlElementContent extends RubyObject { + protected String element_name = null; + + protected String name; + protected Type type; + protected Occur occur; + protected IRubyObject left; + protected IRubyObject right; + + /** values hardcoded from nokogiri/xml/element_content.rb; this + * makes me uneasy, but it works */ + public enum Type { + PCDATA (1), + ELEMENT (2), + SEQ (3), + OR (4); + + private final int value; + Type(int value) { + this.value = value; + } + public IRubyObject value(Ruby runtime) { + return runtime.newFixnum(value); + } + } + + public enum Occur { + ONCE (1), + OPT (2), + MULT (3), + PLUS (4); + + private final int value; + Occur(int value) { + this.value = value; + } + public IRubyObject value(Ruby runtime) { + return runtime.newFixnum(value); + } + } + + public static RubyClass getRubyClass(Ruby ruby) { + return (RubyClass) ruby + .getClassFromPath("Nokogiri::XML::ElementContent"); + } + + public XmlElementContent(Ruby runtime, RubyClass klass, + XmlDocument document, Node node) { + this(runtime, klass, document, new NodeIter(node)); + element_name = ((Element)node).getAttribute("ename"); + + /* + * This is a bit of a hack to match libxml behavior. + * + * If the tree contains but a single group with a single + * element, we can simply return the bare element without the + * surrounding group. + * + * TODO: is SEQ/ONCE with a single child the only case for + * reduction? + * + * - pmahoney + */ + if (!this.left.isNil()) { + XmlElementContent left = (XmlElementContent) this.left; + if (type == Type.SEQ && + occur == Occur.ONCE && + left.type == Type.ELEMENT && + right.isNil()) { + this.name = left.name; + this.type = left.type; + this.occur = left.occur; + this.left = this.right; // both nil + } + } + } + + public XmlElementContent(Ruby runtime, XmlDocument document, Node node) { + this(runtime, getRubyClass(runtime), document, node); + } + + public XmlElementContent(Ruby runtime, RubyClass klass, + XmlDocument doc, NodeIter iter) { + super(runtime, klass); + + setInstanceVariable("@document", doc); + + name = null; + type = Type.SEQ; + occur = Occur.ONCE; + left = runtime.getNil(); + right = runtime.getNil(); + + apply(runtime, klass, doc, iter); + } + + protected XmlElementContent(Ruby runtime, RubyClass klass, + Type type, XmlDocument doc, NodeIter iter, + XmlElementContent left) { + super(runtime, klass); + + setInstanceVariable("@document", doc); + + name = null; + this.type = type; + occur = Occur.ONCE; + this.left = left; + right = runtime.getNil(); + + switch (type) { + case SEQ: + case OR: + applyGroup(runtime, klass, doc, iter); + default: + // noop + } + } + + /** + * Applies the current node in iter to this content + * model. When finished, iter will point to the last + * processed node. + */ + protected void apply(Ruby runtime, RubyClass klass, + XmlDocument doc, + NodeIter iter) { + if (iter.isNull()) return; + + Element elem = (Element) iter.current(); + + if (isGroup(elem) && iter.hasChildren()) { + iter.firstChild(); + applyGroup(runtime, klass, doc, iter); + iter.parent(); + } else if (isElement(elem)) { + name = elem.getAttribute("name"); + type = Type.ELEMENT; + } + + iter.nextSibling(); + if (iter.isNull()) return; + if (isOccurrence(iter.current())) { + setOccur(((Element)iter.current()).getAttribute("type")); + iter.nextSibling(); + } + } + + protected void applyGroup(Ruby runtime, RubyClass klass, + XmlDocument doc, NodeIter iter) { + // LEFT branch + + if (iter.isNull()) return; + + if (left.isNil()) { + left = new XmlElementContent(runtime, klass, doc, iter); + + if (iter.isNull()) return; + + if (isSeparator(iter.current())) { + setType(((Element)iter.current()).getAttribute("type")); + iter.nextSibling(); // skip separator + } + } + + // RIGHT branch + + if (iter.isNull()) return; + + right = new XmlElementContent(runtime, klass, doc, iter); + + if (iter.isNull()) return; + if (isSeparator(iter.current())) + iter.nextSibling(); // skip separator + if (iter.isNull()) return; + + // binary tree can only hold two children. If we have more, + // the right child is another tree with the same sequence + // "type". The "left" of the new tree is what we've + // currently consumed as our "right" branch of this tree. + right = new XmlElementContent(runtime, klass, type, doc, iter, + (XmlElementContent) right); + } + + /** + * Set the type based on the separator node type string. + */ + protected void setType(String type) { + if ("|".equals(type)) this.type = Type.OR; + else if (",".equals(type)) this.type = Type.SEQ; + } + + protected void setOccur(String type) { + if ("*".equals(type)) this.occur = Occur.MULT; + else if ("+".equals(type)) this.occur = Occur.PLUS; + } + + public static boolean isGroup(Node node) { + return XmlDtd.nameEquals(node, DTDConfiguration.E_GROUP); + } + + // content model element, not Element node type + public static boolean isElement(Node node) { + return XmlDtd.nameEquals(node, DTDConfiguration.E_ELEMENT); + } + + public static boolean isSeparator(Node node) { + return XmlDtd.nameEquals(node, DTDConfiguration.E_SEPARATOR); + } + + public static boolean isOccurrence(Node node) { + return XmlDtd.nameEquals(node, DTDConfiguration.E_OCCURRENCE); + } + + /** + * Return the name of the element to which this content model + * applies. Only works for the root of the tree. + */ + public IRubyObject element_name(ThreadContext context) { + return nonEmptyStringOrNil(context.getRuntime(), element_name); + } + + @JRubyMethod + public IRubyObject prefix(ThreadContext context) { + return nonEmptyStringOrNil(context.getRuntime(), getPrefix(name)); + } + + @JRubyMethod + public IRubyObject name(ThreadContext context) { + return nonEmptyStringOrNil(context.getRuntime(), getLocalPart(name)); + } + + @JRubyMethod + public IRubyObject type(ThreadContext context) { + return type.value(context.getRuntime()); + } + + @JRubyMethod + public IRubyObject occur(ThreadContext context) { + return occur.value(context.getRuntime()); + } + + @JRubyMethod + public IRubyObject c1(ThreadContext context) { + return left; + } + + @JRubyMethod + public IRubyObject c2(ThreadContext context) { + return right; + } + + /** + * Iterator for a tree of Nodes. Has a current position that + * points to a given node. Calling nextSibling() on the last + * sibling results in a current position of null. This position + * is not fatal and can be escaped by calling parent() (which + * moves to the parent of previous sibling). The null position is + * used to indicate the end of a list. + */ + protected static class NodeIter { + protected Node pre; + protected Node cur; + + /** + * The first time, we fake a previous sibling element. Thus, + * initially, current() is null, and the first call should be + * nextSibling(). + */ + public NodeIter(Node node) { + pre = null; + cur = node.getFirstChild(); // skip root contentModel node + } + + public Node current() { + return cur; + } + + public boolean isNull() { + return (cur == null); + } + + public boolean hasChildren() { + return (cur != null && cur.hasChildNodes()); + } + + /** + * Descend to the first child. + */ + public Node firstChild() { + if (cur == null) throw new RuntimeException("no children"); + Node ch = cur.getFirstChild(); + if (ch == null) throw new RuntimeException("no children"); + + cur = ch; + return cur; + } + + /** + * Move to the next sibling + */ + public Node nextSibling() { + if (cur == null) { + throw new RuntimeException("no next sibling"); + } else { + Node ns = cur.getNextSibling(); + if (ns == null) { + pre = cur; + cur = null; + } else { + cur = ns; + } + return cur; + } + } + + /** + * Move to the parent. + */ + public Node parent() { + if (cur == null) cur = pre; + + Node p = cur.getParentNode(); + if (p == null) throw new RuntimeException("no parent"); + + cur = p; + return cur; + } + } +} diff --git a/ext/java/nokogiri/XmlElementDecl.java b/ext/java/nokogiri/XmlElementDecl.java index ea09a6ddee8..f2842fe4a61 100644 --- a/ext/java/nokogiri/XmlElementDecl.java +++ b/ext/java/nokogiri/XmlElementDecl.java @@ -1,28 +1,118 @@ package nokogiri; -import nokogiri.internals.XmlElementDeclImpl; +import java.util.ArrayList; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Element; import org.w3c.dom.Node; +import static nokogiri.internals.NokogiriHelpers.getPrefix; +import static nokogiri.internals.NokogiriHelpers.getLocalPart; + /** - * ELEMENT declaration of DTD - * - * @author Yoko Harada + * DTD element declaration. + * + * @author Patrick Mahoney */ -public class XmlElementDecl extends XmlNode implements XmlDtdDeclaration { +public class XmlElementDecl extends XmlNode { + RubyArray attrDecls; + + IRubyObject contentModel; + + public static RubyClass getRubyClass(Ruby ruby) { + return (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementDecl"); + } + + public XmlElementDecl(Ruby ruby, RubyClass klass) { + super(ruby, klass); + throw ruby.newRuntimeError("node required"); + } + + /** + * Initialize based on an elementDecl node from a NekoDTD parsed + * DTD. + */ + public XmlElementDecl(Ruby ruby, RubyClass klass, Node elemDeclNode) { + super(ruby, klass, elemDeclNode); + attrDecls = RubyArray.newArray(ruby); + contentModel = ruby.getNil(); + } + + public static IRubyObject create(ThreadContext context, Node elemDeclNode) { + XmlElementDecl self = + new XmlElementDecl(context.getRuntime(), + getRubyClass(context.getRuntime()), + elemDeclNode); + return self; + } + + public IRubyObject element_name(ThreadContext context) { + return getAttribute(context, "ename"); + } + + public void setContentModel(IRubyObject cm) { + contentModel = cm; + } + + @Override + @JRubyMethod + public IRubyObject content(ThreadContext context) { + return contentModel; + } - public XmlElementDecl(Ruby runtime, RubyClass klazz) { - super(runtime, klazz); + public boolean isEmpty() { + return "EMPTY".equals(getAttribute("model")); } - public XmlElementDecl(Ruby runtime, RubyClass klazz, Node entity) { - super(runtime, klazz, entity); - internalNode = new XmlElementDeclImpl(runtime, entity); + @JRubyMethod + public IRubyObject prefix(ThreadContext context) { + String enamePrefix = getPrefix(getAttribute("ename")); + if (enamePrefix == null) + return context.getRuntime().getNil(); + else + return context.getRuntime().newString(enamePrefix); } - - public void setDeclaration(String declaration) { - ((XmlElementDeclImpl)internalNode).setDeclaration(declaration); + + /** + * Returns the local part of the element name. + */ + @Override + @JRubyMethod + public IRubyObject node_name(ThreadContext context) { + String ename = getLocalPart(getAttribute("ename")); + return context.getRuntime().newString(ename); } + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { + throw context.getRuntime() + .newRuntimeError("cannot change name of DTD decl"); + } + + @Override + @JRubyMethod + public IRubyObject attribute_nodes(ThreadContext context) { + return attrDecls; + } + + @Override + @JRubyMethod + public IRubyObject attribute(ThreadContext context, IRubyObject name) { + throw context.getRuntime() + .newRuntimeError("attribute by name not implemented"); + } + + public void appendAttrDecl(XmlAttributeDecl decl) { + attrDecls.append(decl); + } + +// @JRubyMethod +// public element_type(ThreadContext context) { +// } } diff --git a/ext/java/nokogiri/XmlEntityDecl.java b/ext/java/nokogiri/XmlEntityDecl.java index 6359fa5a906..679cbd1a538 100644 --- a/ext/java/nokogiri/XmlEntityDecl.java +++ b/ext/java/nokogiri/XmlEntityDecl.java @@ -1,26 +1,80 @@ package nokogiri; -import nokogiri.internals.XmlEntityDeclImpl; import org.jruby.Ruby; +import org.jruby.RubyArray; import org.jruby.RubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Element; import org.w3c.dom.Node; /** - * ENTITY declaration of DTD - * @author Yoko Harada + * DTD entity declaration. + * + * @author Patrick Mahoney */ -public class XmlEntityDecl extends XmlNode implements XmlDtdDeclaration { +public class XmlEntityDecl extends XmlNode { - public XmlEntityDecl(Ruby runtime, RubyClass klazz) { - super(runtime, klazz); + public static RubyClass getRubyClass(Ruby ruby) { + return (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDecl"); } - public XmlEntityDecl(Ruby runtime, RubyClass klazz, Node entity) { - super(runtime, klazz, entity); - internalNode = new XmlEntityDeclImpl(runtime, entity); + public XmlEntityDecl(Ruby ruby, RubyClass klass) { + super(ruby, klass); + throw ruby.newRuntimeError("node required"); } - - public void setDeclaration(String declaration) { - ((XmlEntityDeclImpl)internalNode).setDeclaration(declaration); + + /** + * Initialize based on an entityDecl node from a NekoDTD parsed + * DTD. + */ + public XmlEntityDecl(Ruby ruby, RubyClass klass, Node entDeclNode) { + super(ruby, klass, entDeclNode); + } + + public static IRubyObject create(ThreadContext context, Node entDeclNode) { + XmlEntityDecl self = + new XmlEntityDecl(context.getRuntime(), + getRubyClass(context.getRuntime()), + entDeclNode); + return self; + } + + /** + * Returns the local part of the element name. + */ + @Override + @JRubyMethod + public IRubyObject node_name(ThreadContext context) { + return getAttribute(context, "name"); + } + + @Override + @JRubyMethod(name = "node_name=") + public IRubyObject node_name_set(ThreadContext context, IRubyObject name) { + throw context.getRuntime() + .newRuntimeError("cannot change name of DTD decl"); + } + + @JRubyMethod + public IRubyObject content(ThreadContext context) { + return getAttribute(context, "value"); + } + + // TODO: what is content vs. original_content? + @JRubyMethod + public IRubyObject original_content(ThreadContext context) { + return getAttribute(context, "value"); + } + + @JRubyMethod + public IRubyObject system_id(ThreadContext context) { + return getAttribute(context, "sysid"); + } + + @JRubyMethod + public IRubyObject external_id(ThreadContext context) { + return getAttribute(context, "pubid"); } } diff --git a/ext/java/nokogiri/XmlEntityReference.java b/ext/java/nokogiri/XmlEntityReference.java index 0fc80f1dda9..7bb914064d5 100644 --- a/ext/java/nokogiri/XmlEntityReference.java +++ b/ext/java/nokogiri/XmlEntityReference.java @@ -2,10 +2,19 @@ import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.javasupport.util.RuntimeHelpers; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Document; +import org.w3c.dom.Node; + +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; /** * * @author sergio + * @author Patrick Mahoney */ public class XmlEntityReference extends XmlNode{ @@ -13,4 +22,26 @@ public XmlEntityReference(Ruby ruby, RubyClass klazz) { super(ruby, klazz); } + public XmlEntityReference(Ruby ruby, RubyClass klass, Node node) { + super(ruby, klass, node); + } + + @JRubyMethod(name="new", meta=true) + public static IRubyObject rbNew(ThreadContext context, + IRubyObject klass, + IRubyObject doc, + IRubyObject name) { + Document document = ((XmlNode) doc).getOwnerDocument(); + Node node = document.createEntityReference(rubyStringToString(name)); + XmlEntityReference self = new XmlEntityReference(context.getRuntime(), + (RubyClass) klass, + node); + + RuntimeHelpers.invoke(context, self, "initialize", doc, name); + + // TODO: if_block_given. + + return self; + } + } diff --git a/ext/java/nokogiri/XmlNamespace.java b/ext/java/nokogiri/XmlNamespace.java index 4cf381033da..51befaa2e42 100644 --- a/ext/java/nokogiri/XmlNamespace.java +++ b/ext/java/nokogiri/XmlNamespace.java @@ -48,9 +48,9 @@ public XmlNamespace(Ruby ruby, RubyClass klazz, IRubyObject prefix, IRubyObject public static XmlNamespace fromNode(Ruby ruby, Node node) { String localName = getLocalNameForNamespace(node.getNodeName()); - return new XmlNamespace(ruby, - (RubyClass) ruby.getClassFromPath("Nokogiri::XML::Namespace"), - localName, node.getNodeValue()); + RubyClass klass = (RubyClass) + ruby.getClassFromPath("Nokogiri::XML::Namespace"); + return new XmlNamespace(ruby, klass, localName, node.getNodeValue()); } public boolean isEmpty() { diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 4f9ddac5209..1dacde6e99c 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -1,27 +1,21 @@ package nokogiri; -import static java.lang.Math.max; - +import nokogiri.internals.NokogiriHelpers; import java.io.ByteArrayInputStream; import java.io.IOException; - import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; - -import nokogiri.internals.NokogiriHelpers; import nokogiri.internals.NokogiriNamespaceCache; import nokogiri.internals.NokogiriUserDataHandler; -import nokogiri.internals.ParseOptions; +import nokogiri.internals.XmlDomParserContext; import nokogiri.internals.SaveContext; -import nokogiri.internals.XmlDocumentFragmentImpl; -import nokogiri.internals.XmlNodeImpl; - import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyHash; +import org.jruby.RubyNil; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; @@ -35,6 +29,7 @@ import org.jruby.util.ByteList; import org.w3c.dom.DOMException; import org.w3c.dom.Document; +import org.w3c.dom.DocumentType; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; @@ -44,11 +39,27 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import static java.lang.Math.max; +import static nokogiri.internals.NokogiriHelpers.getCachedNodeOrCreate; +import static nokogiri.internals.NokogiriHelpers.isNamespace; +import static nokogiri.internals.NokogiriHelpers.isNonDefaultNamespace; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; +import static nokogiri.internals.NokogiriHelpers.nonEmptyStringOrNil; +import static nokogiri.internals.NokogiriHelpers.stringOrNil; + public class XmlNode extends RubyObject { - protected XmlNodeImpl internalNode; + /** The underlying Node object. */ + protected Node node; protected NokogiriNamespaceCache nsCache; + /* Cached objects */ + protected IRubyObject content = null; + protected IRubyObject doc = null; + protected IRubyObject name = null; + protected IRubyObject namespace = null; + protected IRubyObject namespace_definitions = null; + /* * Taken from http://ejohn.org/blog/comparing-document-position/ * Used for compareDocumentPosition. @@ -61,38 +72,74 @@ public class XmlNode extends RubyObject { protected static final int FIRST_PRECEDES_SECOND = 4; protected static final int SECOND_CONTAINS_FIRST = 8; protected static final int FIRST_CONTAINS_SECOND = 16; - - public XmlNode(Ruby ruby, RubyClass cls){ - this(ruby,cls,null); + + /** + * Cast node to an XmlNode or raise a type error + * in context. + */ + protected static XmlNode asXmlNode(ThreadContext context, IRubyObject node) { + return _asXmlNode(context, node, false); } - public XmlNode(Ruby ruby, RubyClass cls, Node node) { - super(ruby, cls); - this.nsCache = new NokogiriNamespaceCache(); - this.internalNode = XmlNodeImpl.getImplForNode(ruby, node); - if(node != null && node.getNodeType() != Node.DOCUMENT_NODE) { - XmlDocument ownerXml = (XmlDocument) this.document(ruby.getCurrentContext()); - - // Don't touch this. Some nodes has no document - if(ownerXml == null) return; - RuntimeHelpers.invoke(ruby.getCurrentContext(), ownerXml, "decorate", this); - node.setUserData(NokogiriUserDataHandler.CACHED_NODE, this, - new NokogiriUserDataHandler(ruby)); - } + /** + * Cast node to an XmlNode, or null if RubyNil, or + * raise a type error in context. + */ + protected static XmlNode asXmlNodeOrNull(ThreadContext context, IRubyObject node) { + return _asXmlNode(context, node, true); } - protected void assimilateXmlNode(ThreadContext context, IRubyObject otherNode) { - XmlNode toAssimilate = asXmlNode(context, otherNode); + /** + * Get the XmlNode associated with the underlying + * node. Throws an exception if there is no XmlNode. + */ + public static XmlNode fromNode(ThreadContext context, Node node) { + if (node == null) + throw context.getRuntime().newRuntimeError("node is null"); - this.internalNode = toAssimilate.internalNode; + XmlNode xnode = (XmlNode) node.getUserData(NokogiriUserDataHandler.CACHED_NODE); + if (xnode == null) + throw context.getRuntime().newRuntimeError("no cached XmlNode"); + + return xnode; } - private static XmlNode asXmlNode(ThreadContext context, IRubyObject node) { - if (!(node instanceof XmlNode)) { - throw context.getRuntime().newTypeError(node, (RubyClass) context.getRuntime().getClassFromPath("Nokogiri::XML::Node")); + /** + * Get the XmlNode associated with the underlying + * node. Creates a new XmlNode (or appropriate + * subclass) wrapping node if there is no cached + * value. + */ + public static IRubyObject fromNodeOrCreate(ThreadContext context, + Node node) { + Ruby ruby = context.getRuntime(); + if (node == null) return ruby.getNil(); + XmlNode xmlNode = + (XmlNode) node.getUserData(NokogiriUserDataHandler.CACHED_NODE); + if (xmlNode == null) { + xmlNode = (XmlNode) XmlNode.constructNode(ruby, node); + node.setUserData(NokogiriUserDataHandler.CACHED_NODE, xmlNode, + new NokogiriUserDataHandler(ruby)); } + return xmlNode; + } - return (XmlNode) node; + /** + * Cast node to an XmlNode if possible. If + * allowNil is true and node is Ruby nil, returns + * null. Otherwise, raise a type error in context. + */ + private static XmlNode _asXmlNode(ThreadContext context, + IRubyObject node, + boolean allowNil) { + if (allowNil && (node == null || node.isNil())) { + return null; + } else if (!(node instanceof XmlNode)) { + Ruby ruby = context.getRuntime(); + throw ruby.newTypeError(node,(RubyClass) ruby.getClassFromPath("Nokogiri::XML::Node")); + } else { + return (XmlNode) node; + } } /** @@ -105,41 +152,60 @@ public static void coalesceTextNodes(ThreadContext context, IRubyObject prev, IR XmlNode p = asXmlNode(context, prev); XmlNode c = asXmlNode(context, cur); - Node pNode = p.node(); - Node cNode = c.node(); + Node pNode = p.node; + Node cNode = c.node; pNode.setNodeValue(pNode.getNodeValue()+cNode.getNodeValue()); - p.internalNode.resetContent(); + p.content = null; // clear cached content c.assimilateXmlNode(context, p); } /** - * Given three nodes such that firstNode is previousSibling of secondNode - * and secondNode is previousSibling of third node, this method coalesces - * two subsequent TextNodes. + * Coalesce text nodes around anchorNode. If + * anchorNode has siblings (previous or next) that + * are text nodes, the content will be merged into + * anchorNode and the redundant nodes will be removed + * from the DOM. + * + * To match libxml behavior (?) the final content of + * anchorNode and any removed nodes will be + * identical. + * * @param context - * @param firstNode - * @param secondNode - * @param thirdNode + * @param anchorNode */ - protected static void coalesceTextNodesInteligently(ThreadContext context, IRubyObject firstNode, - IRubyObject secondNode, IRubyObject thirdNode) { - - Node first = (firstNode.isNil()) ? null : asXmlNode(context, firstNode).node(); - Node second = asXmlNode(context, secondNode).node(); - Node third = (thirdNode.isNil()) ? null : asXmlNode(context, thirdNode).node(); - - if(second.getNodeType() == Node.TEXT_NODE) { - if(first != null && first.getNodeType() == Node.TEXT_NODE) { - coalesceTextNodes(context, firstNode, secondNode); - } else if(third != null && third.getNodeType() == Node.TEXT_NODE) { - coalesceTextNodes(context, secondNode, thirdNode); + protected static void coalesceTextNodes(ThreadContext context, + IRubyObject anchorNode) { + XmlNode xa = asXmlNode(context, anchorNode); + + XmlNode xp = asXmlNodeOrNull(context, xa.previous_sibling(context)); + XmlNode xn = asXmlNodeOrNull(context, xa.next_sibling(context)); + + Node p = xp == null ? null : xp.node; + Node a = xa.node; + Node n = xn == null ? null : xn.node; + + Node parent = a.getParentNode(); + + if(a.getNodeType() == Node.TEXT_NODE) { + if(p != null && p.getNodeType() == Node.TEXT_NODE) { + xa.setContent(p.getNodeValue() + a.getNodeValue()); + parent.removeChild(p); + xp.assimilateXmlNode(context, xa); + } else if(n != null && n.getNodeType() == Node.TEXT_NODE) { + xa.setContent(a.getNodeValue() + n.getNodeValue()); + parent.removeChild(n); + xn.assimilateXmlNode(context, xa); } } - } + /** + * Construct a new XmlNode wrapping node. The proper + * subclass of XmlNode is chosen based on the type of + * node. + */ public static IRubyObject constructNode(Ruby ruby, Node node) { if (node == null) return ruby.getNil(); // this is slow; need a way to cache nokogiri classes/modules somewhere @@ -153,33 +219,71 @@ public static IRubyObject constructNode(Ruby ruby, Node node) { case Node.ELEMENT_NODE: return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), node); case Node.ENTITY_NODE: - return new XmlNode(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDecl"), node); + return new XmlNode(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDeclaration"), node); case Node.CDATA_SECTION_NODE: return new XmlCdata(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::CDATA"), node); case Node.DOCUMENT_NODE: return new XmlDocument(ruby, (Document) node); - case Node.DOCUMENT_TYPE_NODE: - return new XmlDtd(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DTD"), node); default: return new XmlNode(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Node"), node); } } - protected RubyArray getNsDefinitions(Ruby ruby) { - return this.internalNode.getNsDefinitions(ruby); + public XmlNode(Ruby ruby, RubyClass cls){ + this(ruby, cls, null); } - public Node getNode() { - return this.internalNode.getNode(); + public XmlNode(Ruby ruby, RubyClass cls, Node node) { + super(ruby, cls); + this.nsCache = new NokogiriNamespaceCache(); + this.node = node; + + if (node != null) { + resetCache(ruby); + + if (node.getNodeType() != Node.DOCUMENT_NODE) { + XmlNode owner = (XmlNode) this.document(ruby.getCurrentContext()); + + if (owner != null && owner instanceof XmlDocument) { + RuntimeHelpers.invoke(ruby.getCurrentContext(), + owner, "decorate", this); + } + } + } } - public static Node getNodeFromXmlNode(ThreadContext context, IRubyObject xmlNode) { - - return asXmlNode(context, xmlNode).node(); + public void resetCache(Ruby ruby) { + node.setUserData(NokogiriUserDataHandler.CACHED_NODE, this, + new NokogiriUserDataHandler(ruby)); + } + + + /** + * Set the underlying node of this node to the underlying node of + * otherNode. + * + * FIXME: also update the cached node? + */ + protected void assimilateXmlNode(ThreadContext context, IRubyObject otherNode) { + XmlNode toAssimilate = asXmlNode(context, otherNode); + + this.node = toAssimilate.node; + content = null; // clear cache + } + + /** + * See org.w3.dom.Node#normalize. + */ + public void normalize() { + node.normalize(); } - protected Node getNodeToCompare() { - return this.getNode(); + public Node getNode() { + return node; + } + + public static Node getNodeFromXmlNode(ThreadContext context, IRubyObject xmlNode) { + return asXmlNode(context, xmlNode).node; } protected String indentString(IRubyObject indentStringObject, String xml) { @@ -231,19 +335,11 @@ public IRubyObject internal_node(ThreadContext context) { return context.getRuntime().newData(this.getType(), this.getNode()); } - public boolean isComment() { return this.internalNode.methods().isComment(); } + public boolean isComment() { return false; } - public boolean isElement() { return this.internalNode.methods().isElement(); } + public boolean isElement() { return false; } - public boolean isProcessingInstruction() { return this.internalNode.methods().isProcessingInstruction(); } - - /* - * A more rubyist way to get the internal node. - */ - - public Node node() { - return this.getNode(); - } + public boolean isProcessingInstruction() { return false; } protected IRubyObject parseRubyString(Ruby ruby, RubyString content) { try { @@ -270,50 +366,60 @@ public InputSource resolveEntity(String arg0, String arg1) throws SAXException, } } - public void post_add_child(ThreadContext context, XmlNode current, XmlNode child) { - this.internalNode.post_add_child(context, current, child); + /** + * Return the string value of the attribute key or + * nil. + * + * Only applies where the underlying Node is an Element node, but + * implemented here in XmlNode because not all nodes with + * underlying Element nodes subclass XmlElement, such as the DTD + * declarations like XmlElementDecl. + */ + protected IRubyObject getAttribute(ThreadContext context, String key) { + return getAttribute(context.getRuntime(), key); } - public void relink_namespace(ThreadContext context) { - this.internalNode.methods().relink_namespace(context, this); - - ((XmlNodeSet) this.children(context)).relink_namespace(context); + protected IRubyObject getAttribute(Ruby runtime, String key) { + String value = getAttribute(key); + return nonEmptyStringOrNil(runtime, value); } - public void resetDocumentCache() { - this.internalNode.resetDocument(); - } + protected String getAttribute(String key) { + if (!(node instanceof Element)) return null; - public void resetDueToRenaming() { - this.internalNode.resetName(); + String value = ((Element)node).getAttribute(key); + return value.isEmpty() ? null : value; } - public void saveContent(ThreadContext context, SaveContext ctx) { - this.internalNode.methods().saveContent(context, this, ctx); + + public void post_add_child(ThreadContext context, XmlNode current, XmlNode child) { } - public void saveContentAsHtml(ThreadContext context, SaveContext ctx) { - this.internalNode.saveContentAsHtml(context, this, ctx); + public void setNamespaceDefinitions(IRubyObject namespace_definitions) { + this.namespace_definitions = namespace_definitions; } - public void setDocument(IRubyObject doc) { - this.internalNode.setDocument(doc); + public void relink_namespace(ThreadContext context) { + ((XmlNodeSet) this.children(context)).relink_namespace(context); } - public void setInternalNode(XmlNodeImpl impl) { - this.internalNode = impl; + public void saveContent(ThreadContext context, SaveContext ctx) { } public void setName(IRubyObject name) { - this.internalNode.setName(name); + this.name = name; + } + + public void setDocument(IRubyObject doc) { + this.doc = doc; } protected void setNode(Ruby ruby, Node node) { - this.internalNode = XmlNodeImpl.getImplForNode(ruby, node); + this.node = node; } public void updateNodeNamespaceIfNecessary(ThreadContext context, XmlNamespace ns) { - String oldPrefix = this.node().getPrefix(); + String oldPrefix = this.node.getPrefix(); String uri = ns.href(context).convertToString().asJavaString(); /* @@ -324,22 +430,44 @@ public void updateNodeNamespaceIfNecessary(ThreadContext context, XmlNamespace n && oldPrefix.equals(ns.prefix(context).convertToString().asJavaString())); if(update) { - this.node().getOwnerDocument().renameNode(this.node(), uri, this.node().getNodeName()); - this.internalNode.setNamespace(ns); + this.node.getOwnerDocument().renameNode(this.node, uri, this.node.getNodeName()); + this.namespace = ns; } } + public RubyString getNodeName(ThreadContext context) { + String str = null; + + if (this.name == null && node != null) { + str = node.getNodeName(); + if (str == null) { str = ""; } + + if(str.equals("#document")) { + str = "document"; + } else if(str.equals("#text")) { + str = "text"; + } else { + str = NokogiriHelpers.getLocalPart(str); + } + + if (str == null) str = ""; + this.name = context.getRuntime().newString(str); + } + + return (RubyString) this.name; + } + @JRubyMethod(name = "new", meta = true) public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject name, IRubyObject doc, Block block) { Ruby ruby = context.getRuntime(); - if(!(doc instanceof XmlDocument)) { - throw ruby.newArgumentError("document must be an instance of Nokogiri::XML::Document"); + Document document = asXmlNode(context, doc).getOwnerDocument(); + if (document == null) { + throw ruby.newArgumentError("node must have owner document"); } - - XmlDocument xmlDoc = (XmlDocument)doc; - Document document = xmlDoc.getDocument(); + XmlDocument xmlDoc = + (XmlDocument) getCachedNodeOrCreate(ruby, document); Element element = document.createElementNS(null, name.convertToString().asJavaString()); @@ -350,95 +478,72 @@ public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObj } XmlElement node = new XmlElement(ruby, - klazz, - element); - node.internalNode.setDocument(doc); - + klazz, + element); + node.setDocument(xmlDoc); + RuntimeHelpers.invoke(context, xmlDoc, "decorate", node); element.setUserData(NokogiriUserDataHandler.CACHED_NODE, - node, new NokogiriUserDataHandler(ruby)); + node, new NokogiriUserDataHandler(ruby)); if(block.isGiven()) block.call(context, node); return node; } - @JRubyMethod - public IRubyObject add_child(ThreadContext context, IRubyObject child) { - XmlNode childNode = asXmlNode(context, child); - childNode.internalNode.methods().add_child(context, this, childNode); - - return child; + protected void saveNodeListContent(ThreadContext context, XmlNodeSet list, SaveContext ctx) { + saveNodeListContent(context, (RubyArray) list.to_a(context), ctx); } - @JRubyMethod - public IRubyObject add_child_node(ThreadContext context, IRubyObject child) { - XmlNode childNode = asXmlNode(context, child); - if (this instanceof XmlDocumentFragment) { - ((XmlDocumentFragmentImpl)internalNode).use_super_add_child(context, this, childNode); - } - internalNode.methods().add_child(context, this, childNode); - return this; - } + protected void saveNodeListContent(ThreadContext context, RubyArray array, SaveContext ctx) { + int length = array.getLength(); - @JRubyMethod - public IRubyObject add_namespace_definition(ThreadContext context, IRubyObject prefix, IRubyObject href) { - String prefixString = prefix.isNil() ? "" : prefix.convertToString().asJavaString(); - String hrefString = href.convertToString().asJavaString(); - XmlNamespace ns = this.nsCache.get(context, this, prefixString, hrefString); + boolean formatIndentation = ctx.format() && ctx.indentString()!=null; - this.internalNode.methods().add_namespace_definitions(context, this, ns, - (prefix.isNil()) ? "xmlns" : "xmlns:"+prefixString, hrefString); + for(int i = 0; i < length; i++) { + XmlNode cur = (XmlNode) array.get(i); - this.internalNode.resetNamespaceDefinitions(); - return ns; - } + // if(formatIndentation && + // (cur.isElement() || cur.isComment() || cur.isProcessingInstruction())) { + // ctx.append(ctx.getCurrentIndentString()); + // } - @JRubyMethod - public IRubyObject add_next_sibling(ThreadContext context, IRubyObject appendNode) { - IRubyObject nextSibling = this.next_sibling(context); + cur.saveContent(context, ctx); - XmlNode otherNode = asXmlNode(context, appendNode); - Node next = this.node().getNextSibling(); - if (next != null) { - this.node().getParentNode().insertBefore(otherNode.node(), next); - } else { - this.node().getParentNode().appendChild(otherNode.node()); + // if(ctx.format()) ctx.append("\n"); } - RuntimeHelpers.invoke(context, otherNode, "decorate!"); - - coalesceTextNodesInteligently(context, this, appendNode, nextSibling); - - return otherNode; } - + @JRubyMethod - public IRubyObject add_next_sibling_node(ThreadContext context, IRubyObject appendNode) { - internalNode.add_next_sibling(context, (XmlNode)appendNode, (XmlNode)this); - return this; + public IRubyObject add_child_node(ThreadContext context, IRubyObject child) { + adoptAs(context, AdoptScheme.CHILD, child); + return child; } + /** + * Add a namespace definition to this node. To the underlying + * node, add an attribute of the form + * xmlns:prefix="uri". + */ @JRubyMethod - public IRubyObject add_previous_sibling(ThreadContext context, IRubyObject node) { - IRubyObject previousSibling = this.previous_sibling(context); - XmlNode otherNode = asXmlNode(context, node); + public IRubyObject add_namespace_definition(ThreadContext context, + IRubyObject prefix, + IRubyObject href) { + String prefixString = prefix.isNil() ? "" : rubyStringToString(prefix); + String hrefString = rubyStringToString(href); + XmlNamespace ns = this.nsCache.get(context, this, prefixString, hrefString); - try{ - this.node().getParentNode().insertBefore(otherNode.node(), this.node()); - } catch (DOMException ex) { - throw context.getRuntime().newRuntimeError("This should not happen: "+ex.getMessage()); + if (node instanceof Element) { } - RuntimeHelpers.invoke(context , otherNode, "decorate!"); - - coalesceTextNodesInteligently(context, previousSibling, otherNode, this); - return node; + namespace_definitions = null; // clear cache + return ns; } @JRubyMethod public IRubyObject attribute(ThreadContext context, IRubyObject name){ - NamedNodeMap attrs = this.node().getAttributes(); + NamedNodeMap attrs = this.node.getAttributes(); Node attr = attrs.getNamedItem(name.convertToString().asJavaString()); if(attr == null) { return context.getRuntime().newString(ERR_INSECURE_SET_INST_VAR); @@ -446,9 +551,9 @@ public IRubyObject attribute(ThreadContext context, IRubyObject name){ return constructNode(context.getRuntime(), attr); } - @JRubyMethod + @JRubyMethod() public IRubyObject attribute_nodes(ThreadContext context) { - NamedNodeMap nodeMap = this.node().getAttributes(); + NamedNodeMap nodeMap = this.node.getAttributes(); Ruby ruby = context.getRuntime(); if(nodeMap == null){ @@ -458,9 +563,7 @@ public IRubyObject attribute_nodes(ThreadContext context) { RubyArray attr = ruby.newArray(); for(int i = 0; i < nodeMap.getLength(); i++) { - if (!NokogiriHelpers.isNamespace(nodeMap.item(i))) { - attr.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, nodeMap.item(i))); - } + attr.append(fromNodeOrCreate(context, nodeMap.item(i))); } return attr; @@ -471,7 +574,7 @@ public IRubyObject attribute_with_ns(ThreadContext context, IRubyObject name, IR String namej = name.convertToString().asJavaString(); String nsj = (namespace.isNil()) ? null : namespace.convertToString().asJavaString(); - Node el = this.node().getAttributes().getNamedItemNS(nsj, namej); + Node el = this.node.getAttributes().getNamedItemNS(nsj, namej); if(el == null) { return context.getRuntime().getNil(); @@ -479,50 +582,40 @@ public IRubyObject attribute_with_ns(ThreadContext context, IRubyObject name, IR return NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), el); } - @JRubyMethod - public IRubyObject attributes(ThreadContext context) { - Ruby ruby = context.getRuntime(); - RubyHash hash = RubyHash.newHash(ruby); - NamedNodeMap attrs = node().getAttributes(); - for (int i = 0; i < attrs.getLength(); i++) { - Node attr = attrs.item(i); - hash.op_aset(context, RubyString.newString(ruby, attr.getNodeName()), RubyString.newString(ruby, attr.getNodeValue())); - } - return hash; - } - @JRubyMethod(name = "blank?") public IRubyObject blank_p(ThreadContext context) { - return this.internalNode.methods().blank_p(context, this); + return context.getRuntime().getFalse(); } @JRubyMethod public IRubyObject child(ThreadContext context) { - return constructNode(context.getRuntime(), node().getFirstChild()); + return fromNodeOrCreate(context, node.getFirstChild()); } @JRubyMethod public IRubyObject children(ThreadContext context) { - return this.internalNode.children(context, this); - + XmlNodeSet result = new XmlNodeSet(context.getRuntime(), + node.getChildNodes()); + result.setDocument((XmlDocument) fromNode(context, this.getOwnerDocument())); + return result; } @JRubyMethod - public IRubyObject compare(ThreadContext context, IRubyObject otherNode) { - if(!(otherNode instanceof XmlNode)) { + public IRubyObject compare(ThreadContext context, IRubyObject other) { + if(!(other instanceof XmlNode)) { return context.getRuntime().newFixnum(-2); } - Node on = ((XmlNode) otherNode).getNodeToCompare(); + Node otherNode = asXmlNode(context, other).node; // Do not touch this if, if it's not for a good reason. - if(getNodeToCompare().getNodeType() == Node.DOCUMENT_NODE || - on.getNodeType() == Node.DOCUMENT_NODE) { + if(node.getNodeType() == Node.DOCUMENT_NODE || + otherNode.getNodeType() == Node.DOCUMENT_NODE) { return context.getRuntime().newFixnum(-1); } try{ - int res = getNodeToCompare().compareDocumentPosition(on); + int res = node.compareDocumentPosition(otherNode); if( (res & FIRST_PRECEDES_SECOND) == FIRST_PRECEDES_SECOND) { return context.getRuntime().newFixnum(-1); } else if ( (res & SECOND_PRECEDES_FIRST) == SECOND_PRECEDES_FIRST) { @@ -539,12 +632,22 @@ public IRubyObject compare(ThreadContext context, IRubyObject otherNode) { @JRubyMethod public IRubyObject content(ThreadContext context) { - return this.internalNode.getContent(context); + if(this.content == null) { + String textContent = this.node.getTextContent(); + content = stringOrNil(context.getRuntime(), textContent); + } + + return this.content; } @JRubyMethod public IRubyObject document(ThreadContext context) { - return this.internalNode.getDocument(context); + if(this.doc == null) { + this.doc = fromNodeOrCreate(context, + this.node.getOwnerDocument()); + } + + return this.doc; } @JRubyMethod @@ -560,52 +663,171 @@ public IRubyObject dup(ThreadContext context, IRubyObject depth) { } protected IRubyObject dup_implementation(ThreadContext context, boolean deep) { - Node newNode = this.internalNode.cloneNode(context, this, deep); + XmlNode clone; + try { + clone = (XmlNode) clone(); + } catch (CloneNotSupportedException e) { + throw context.getRuntime().newRuntimeError(e.toString()); + } + Node newNode = node.cloneNode(deep); + clone.node = newNode; + return clone; + } - return new XmlNode(context.getRuntime(), this.getType(), newNode); + public static IRubyObject encode_special_chars(ThreadContext context, + IRubyObject string) { + String s = rubyStringToString(string); + String enc = NokogiriHelpers.encodeJavaString(s); + return context.getRuntime().newString(enc); } - @JRubyMethod - public IRubyObject encode_special_chars(ThreadContext context, IRubyObject string) { - String s = string.convertToString().asJavaString(); - return RubyString.newString(context.getRuntime(), - NokogiriHelpers.encodeJavaString(s)); + /** + * Instance method version of the above static method. + */ + @JRubyMethod(name="encode_special_chars") + public IRubyObject i_encode_special_chars(ThreadContext context, + IRubyObject string) { + return encode_special_chars(context, string); } + /** + * Get the attribute at the given key, rbkey. + * Assumes that this node has attributes (i.e. that key? returned + * true). Overridden in XmlElement. + */ @JRubyMethod(visibility = Visibility.PRIVATE) - public IRubyObject get(ThreadContext context, IRubyObject attribute) { - return this.internalNode.methods().get(context, this, attribute); + public IRubyObject get(ThreadContext context, IRubyObject rbkey) { + return context.getRuntime().getNil(); + } + + /** + * Returns the owner document, checking if this node is the + * document, or returns null if there is no owner. + */ + protected Document getOwnerDocument() { + if (node.getNodeType() == Node.DOCUMENT_NODE) { + return (Document) node; + } else { + return node.getOwnerDocument(); + } } @JRubyMethod public IRubyObject internal_subset(ThreadContext context) { - if (node().getNodeType() == Node.DOCUMENT_NODE) { - Document doc = (Document)node(); - return XmlNode.constructNode(context.getRuntime(), doc.getDoctype()); - } else { + Document document = getOwnerDocument(); + + if(document == null) { return context.getRuntime().getNil(); } - /* - if(this.node().getOwnerDocument() == null) { + + XmlDocument xdoc = + (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); + IRubyObject xdtd = xdoc.getInternalSubset(context); + return xdtd; + } + + @JRubyMethod + public IRubyObject create_internal_subset(ThreadContext context, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) { + IRubyObject subset = internal_subset(context); + if (!subset.isNil()) { + throw context.getRuntime() + .newRuntimeError("Document already has internal subset"); + } + + throw context.getRuntime().newNotImplementedError("not implemented"); + } + + @JRubyMethod + public IRubyObject external_subset(ThreadContext context) { + Document document = getOwnerDocument(); + + if(document == null) { return context.getRuntime().getNil(); } - return XmlNode.constructNode(context.getRuntime(), this.node().getOwnerDocument().getDoctype()); - */ + + XmlDocument xdoc = + (XmlDocument) getCachedNodeOrCreate(context.getRuntime(), document); + IRubyObject xdtd = xdoc.getExternalSubset(context); + return xdtd; + } + + @JRubyMethod + public IRubyObject create_external_subset(ThreadContext context, + IRubyObject name, + IRubyObject external_id, + IRubyObject system_id) { + IRubyObject subset = external_subset(context); + if (!subset.isNil()) { + throw context.getRuntime() + .newRuntimeError("Document already has external subset"); + } + + throw context.getRuntime().newNotImplementedError("not implemented"); } + /** + * Test if this node has an attribute named rbkey. + * Overridden in XmlElement. + */ @JRubyMethod(name = "key?") - public IRubyObject key_p(ThreadContext context, IRubyObject k) { - return this.internalNode.methods().key_p(context, this, k); + public IRubyObject key_p(ThreadContext context, IRubyObject rbkey) { + return context.getRuntime().getNil(); } @JRubyMethod public IRubyObject namespace(ThreadContext context){ - return this.internalNode.getNamespace(context); + if(namespace == null) { + String prefix = node.getPrefix(); + namespace = nsCache.get(context, this, + prefix == null ? "" : prefix, + node.lookupNamespaceURI(prefix)); + if (namespace == null) { + namespace = + new XmlNamespace(context.getRuntime(), + node.getPrefix(), + node.lookupNamespaceURI(node.getPrefix())); + } + + if(((XmlNamespace) namespace).isEmpty()) { + namespace = context.getRuntime().getNil(); + } + } + + return namespace; } + /** + * Return an array of XmlNamespace nodes based on the attributes + * of this node. + */ @JRubyMethod public IRubyObject namespace_definitions(ThreadContext context) { - return this.getNsDefinitions(context.getRuntime()); + if (this.namespace_definitions == null) { + Ruby ruby = context.getRuntime(); + RubyArray arr = ruby.newArray(); + NamedNodeMap nodes = node.getAttributes(); + + if(nodes == null) { + return ruby.newEmptyArray(); + } + + IRubyObject document = document(context); + for(int i = 0; i < nodes.getLength(); i++) { + Node n = nodes.item(i); + if(isNamespace(n)) { + XmlNamespace ns = XmlNamespace.fromNode(ruby, n); + ns.setDocument(document); + arr.append(ns); + } + } + + this.namespace_definitions = arr; + } + + return (RubyArray) this.namespace_definitions; } @JRubyMethod(name="namespaced_key?") @@ -614,28 +836,32 @@ public IRubyObject namespaced_key_p(ThreadContext context, IRubyObject elementLN context.getRuntime().getFalse() : context.getRuntime().getTrue(); } - @JRubyMethod - public IRubyObject namespaces(ThreadContext context) { - Ruby ruby = context.getRuntime(); - RubyHash hash = RubyHash.newHash(ruby); - NamedNodeMap attrs = node().getAttributes(); - for (int i = 0; i < attrs.getLength(); i++) { - Node attr = attrs.item(i); - hash.op_aset(context, RubyString.newString(ruby, attr.getNodeName()), RubyString.newString(ruby, attr.getNodeValue())); - } - return hash; + protected void setContent(IRubyObject content) { + this.content = content; + this.node.setTextContent(rubyStringToString(content)); + } + + protected void setContent(String content) { + getNode().setTextContent(content); + this.content = null; // clear cache } @JRubyMethod(name = "native_content=", visibility = Visibility.PRIVATE) public IRubyObject native_content_set(ThreadContext context, IRubyObject content) { - RubyString newContent = content.convertToString(); - this.internalNode.setContent(newContent); - this.node().setTextContent(newContent.asJavaString()); + setContent(content); return content; } + /** + * @param args {IRubyObject io, + * IRubyObject encoding, + * IRubyObject indentString, + * IRubyObject options} + */ @JRubyMethod(required=4, visibility=Visibility.PRIVATE) - public IRubyObject native_write_to(ThreadContext context, IRubyObject[] args) {//IRubyObject io, IRubyObject encoding, IRubyObject indentString, IRubyObject options) { + public IRubyObject native_write_to(ThreadContext context, + IRubyObject[] args) { + IRubyObject io = args[0]; IRubyObject encoding = args[1]; IRubyObject indentString = args[2]; @@ -649,58 +875,47 @@ public IRubyObject native_write_to(ThreadContext context, IRubyObject[] args) {/ indentString.convertToString().asJavaString(), encString); - if(ctx.asHtml()){ - this.saveContentAsHtml(context, ctx); - } else { - this.saveContent(context, ctx); - } + saveContent(context, ctx); - RuntimeHelpers.invoke(context, io, "write", context.getRuntime().newString(ctx.toString())); + RuntimeHelpers.invoke(context, io, "write", + ctx.toRubyString(context.getRuntime())); return io; } @JRubyMethod public IRubyObject next_sibling(ThreadContext context) { - return constructNode(context.getRuntime(), node().getNextSibling()); + return fromNodeOrCreate(context, node.getNextSibling()); + } + + @JRubyMethod + public IRubyObject previous_sibling(ThreadContext context) { + return fromNodeOrCreate(context, node.getPreviousSibling()); } @JRubyMethod(meta = true, rest = true) - public static IRubyObject new_from_str(ThreadContext context, IRubyObject cls, IRubyObject[] args) { - // TODO: duplicating code from Document.read_memory - Ruby ruby = context.getRuntime(); - Arity.checkArgumentCount(ruby, args, 4, 4); - ParseOptions options = new ParseOptions(args[3]); - try { - Document document; - RubyString content = args[0].convertToString(); - ByteList byteList = content.getByteList(); - ByteArrayInputStream bais = new ByteArrayInputStream(byteList.unsafeBytes(), byteList.begin(), byteList.length()); - document = options.getDocumentBuilder().parse(bais); - return constructNode(ruby, document.getFirstChild()); - } catch (ParserConfigurationException pce) { - throw RaiseException.createNativeRaiseException(ruby, pce); - } catch (SAXException saxe) { - throw RaiseException.createNativeRaiseException(ruby, saxe); - } catch (IOException ioe) { - throw RaiseException.createNativeRaiseException(ruby, ioe); - } + public static IRubyObject new_from_str(ThreadContext context, + IRubyObject cls, + IRubyObject[] args) { + XmlDocument doc = (XmlDocument) XmlDocument.read_memory(context, args); + return doc.root(context); } @JRubyMethod public IRubyObject node_name(ThreadContext context) { - return this.internalNode.getNodeName(context); + return getNodeName(context); } @JRubyMethod(name = "node_name=") public IRubyObject node_name_set(ThreadContext context, IRubyObject nodeName) { - this.internalNode.methods().node_name_set(context, this, nodeName); - return nodeName; + String newName = nodeName.convertToString().asJavaString(); + getOwnerDocument().renameNode(node, null, newName); + setName(nodeName); + return this; } @JRubyMethod(name = "[]=") public IRubyObject op_aset(ThreadContext context, IRubyObject index, IRubyObject val) { - this.internalNode.methods().op_aset(context, this, index, val); return val; } @@ -710,90 +925,111 @@ public IRubyObject parent(ThreadContext context) { * Check if this node is the root node of the document. * If so, parent is the document. */ - if(node().getOwnerDocument().getDocumentElement() == node()) { + if(node.getOwnerDocument().getDocumentElement() == node) { return document(context); } else { - return NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), node().getParentNode()); + return fromNodeOrCreate(context, node.getParentNode()); } } - @JRubyMethod(name = "parent=") - public IRubyObject parent_set(ThreadContext context, IRubyObject parent) { - Node otherNode = getNodeFromXmlNode(context, parent); - otherNode.appendChild(node()); - return parent; - } - @JRubyMethod public IRubyObject path(ThreadContext context) { - return RubyString.newString(context.getRuntime(), NokogiriHelpers.getNodeCompletePath(this.node())); + return RubyString.newString(context.getRuntime(), NokogiriHelpers.getNodeCompletePath(this.node)); } @JRubyMethod public IRubyObject pointer_id(ThreadContext context) { - return RubyFixnum.newFixnum(context.getRuntime(), this.node().hashCode()); - } - - @JRubyMethod - public IRubyObject previous_sibling(ThreadContext context) { - return constructNode(context.getRuntime(), node().getPreviousSibling()); + return RubyFixnum.newFixnum(context.getRuntime(), this.node.hashCode()); } @JRubyMethod public IRubyObject remove_attribute(ThreadContext context, IRubyObject name) { - this.internalNode.methods().remove_attribute(context, this, name); - return context.getRuntime().getNil(); - } - - @JRubyMethod(name="replace_with_node", visibility=Visibility.PROTECTED) - public IRubyObject replace(ThreadContext context, IRubyObject newNode) { - Node otherNode = getNodeFromXmlNode(context, newNode); - - if(!otherNode.getOwnerDocument().equals(node().getOwnerDocument())) { - node().getOwnerDocument().adoptNode(otherNode); - } - - node().getParentNode().replaceChild(otherNode, node()); - - ((XmlNode) newNode).relink_namespace(context); - return this; } @JRubyMethod(visibility=Visibility.PRIVATE) public IRubyObject set_namespace(ThreadContext context, IRubyObject namespace) { - this.internalNode.setNamespace(namespace); - this.internalNode.resetNamespaceDefinitions(); + //setNamespace(namespace); XmlNamespace ns = (XmlNamespace) namespace; String prefix = ns.prefix(context).convertToString().asJavaString(); String href = ns.href(context).convertToString().asJavaString(); - this.node().getOwnerDocument().renameNode(node(), href, NokogiriHelpers.newQName(prefix, node())); + // Assigning node = ...renameNode() or not seems to make no + // difference. Why not? -pmahoney + node = node.getOwnerDocument() + .renameNode(node, href, NokogiriHelpers.newQName(prefix, node)); + + this.namespace = null; // clear cache return this; } @JRubyMethod public IRubyObject unlink(ThreadContext context) { - this.internalNode.methods().unlink(context, this); + if(node.getParentNode() == null) { + throw context.getRuntime().newRuntimeError("TYPE: " + node.getNodeType()+ " PARENT NULL"); + } else { + node.getParentNode().removeChild(node); + } + return this; } - @JRubyMethod(name = "node_type") - public IRubyObject xmlType(ThreadContext context) { - return this.internalNode.methods().getNokogiriNodeType(context); + /** + * The C-library simply returns libxml2 magic numbers. Here we + * convert Java Xml nodes to the appropriate constant defined in + * xml/node.rb. + */ + @JRubyMethod + public IRubyObject node_type(ThreadContext context) { + + String type; + switch (node.getNodeType()) { + case Node.ELEMENT_NODE: + if (this instanceof XmlElementDecl) + type = "ELEMENT_DECL"; + else if (this instanceof XmlAttributeDecl) + type = "ATTRIBUTE_DECL"; + else if (this instanceof XmlEntityDecl) + type = "ENTITY_DECL"; + else + type = "ELEMENT_NODE"; + break; + case Node.ATTRIBUTE_NODE: type = "ATTRIBUTE_NODE"; break; + case Node.TEXT_NODE: type = "TEXT_NODE"; break; + case Node.CDATA_SECTION_NODE: type = "CDATA_SECTION_NODE"; break; + case Node.ENTITY_REFERENCE_NODE: type = "ENTITY_REF_NODE"; break; + case Node.ENTITY_NODE: type = "ENTITY_NODE"; break; + case Node.PROCESSING_INSTRUCTION_NODE: type = "PI_NODE"; break; + case Node.COMMENT_NODE: type = "COMMENT_NODE"; break; + case Node.DOCUMENT_NODE: + if (this instanceof HtmlDocument) + type = "HTML_DOCUMENT_NODE"; + else + type = "DOCUMENT_NODE"; + break; + case Node.DOCUMENT_TYPE_NODE: type = "DOCUMENT_TYPE_NODE"; break; + case Node.DOCUMENT_FRAGMENT_NODE: type = "DOCUMENT_FRAG_NODE"; break; + case Node.NOTATION_NODE: type = "NOTATION_NODE"; break; + default: + return context.getRuntime().newFixnum(0); + } + + return context.getRuntime() + .getClassFromPath("Nokogiri::XML::Node") + .getConstant(type); } - + @JRubyMethod public IRubyObject line(ThreadContext context) { - Node root = internalNode.getDocument(context).getDocument(); + Node root = getOwnerDocument(); int[] counter = new int[1]; count(root, counter); return RubyFixnum.newFixnum(context.getRuntime(), counter[0]+1); } - + private boolean count(Node node, int[] counter) { - if (node == this.getNode()) { + if (node == this.node) { return true; } NodeList list = node.getChildNodes(); @@ -807,30 +1043,183 @@ private boolean count(Node node, int[] counter) { } return false; } - + @JRubyMethod public IRubyObject next_element(ThreadContext context) { - Node node = this.getNode().getNextSibling(); + Node nextNode = node.getNextSibling(); Ruby ruby = context.getRuntime(); - if (node == null) return ruby.getNil(); - if (node instanceof Element) { - return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), node); + if (nextNode == null) return ruby.getNil(); + if (nextNode instanceof Element) { + return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), nextNode); } - Node deeper = node.getNextSibling(); + Node deeper = nextNode.getNextSibling(); if (deeper == null) return ruby.getNil(); return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), deeper); } @JRubyMethod public IRubyObject previous_element(ThreadContext context) { - Node node = this.getNode().getPreviousSibling(); + Node prevNode = node.getPreviousSibling(); Ruby ruby = context.getRuntime(); - if (node == null) return ruby.getNil(); - if (node instanceof Element) { - return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), node); + if (prevNode == null) return ruby.getNil(); + if (prevNode instanceof Element) { + return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), prevNode); } - Node shallower = node.getPreviousSibling(); + Node shallower = prevNode.getPreviousSibling(); if (shallower == null) return ruby.getNil(); return new XmlElement(ruby, (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element"), shallower); } + + protected enum AdoptScheme { + CHILD, PREV_SIBLING, NEXT_SIBLING, REPLACEMENT; + } + + /** + * Adopt XmlNode other into the document of + * this using the specified scheme. + */ + protected IRubyObject adoptAs(ThreadContext context, AdoptScheme scheme, + IRubyObject other_) { + XmlNode other = asXmlNode(context, other_); + Node thisNode = this.getNode(); + Node otherNode = other.getNode(); + + try { + Document doc = thisNode.getOwnerDocument(); + + if (doc != null && doc != otherNode.getOwnerDocument()) { + Node ret = doc.adoptNode(otherNode); + if (ret == null) { + throw context.getRuntime() + .newRuntimeError("Failed to take ownership of node"); + } + } + + Node parent = thisNode.getParentNode(); + + switch (scheme) { + case CHILD: + adoptAsChild(context, thisNode, otherNode); + break; + case PREV_SIBLING: + adoptAsPrevSibling(context, parent, thisNode, otherNode); + break; + case NEXT_SIBLING: + adoptAsNextSibling(context, parent, thisNode, otherNode); + break; + case REPLACEMENT: + adoptAsReplacement(context, parent, thisNode, otherNode); + break; + } + } catch (Exception e) { + throw context.getRuntime().newRuntimeError(e.toString()); + } + + if (otherNode.getNodeType() == Node.TEXT_NODE) { + coalesceTextNodes(context, other); + } + + //other.relink_namespace(context); + // post_add_child(context, this, other); + + return this; + } + + protected void adoptAsChild(ThreadContext context, Node parent, + Node otherNode) { + /* + * This is a bit of a hack. C-Nokogiri allows adding a bare + * text node as the root element. Java (and XML spec?) does + * not. So we wrap the text node in an element. + */ + if (parent.getNodeType() == Node.DOCUMENT_NODE && + otherNode.getNodeType() == Node.TEXT_NODE) { + Element e = ((Document)parent).createElement("text"); + e.appendChild(otherNode); + otherNode = e; + } + + parent.appendChild(otherNode); + } + + + protected void adoptAsPrevSibling(ThreadContext context, + Node parent, + Node thisNode, Node otherNode) { + if (parent == null) { + /* I'm not sure what do do here... A node with no + * parent can't exactly have a 'sibling', so we make + * otherNode parentless also. */ + if (otherNode.getParentNode() != null) + otherNode.getParentNode().removeChild(otherNode); + + return; + } + + parent.insertBefore(otherNode, thisNode); + } + + protected void adoptAsNextSibling(ThreadContext context, + Node parent, + Node thisNode, Node otherNode) { + if (parent == null) { + /* I'm not sure what do do here... A node with no + * parent can't exactly have a 'sibling', so we make + * otherNode parentless also. */ + if (otherNode.getParentNode() != null) + otherNode.getParentNode().removeChild(otherNode); + + return; + } + + Node nextSib = thisNode.getNextSibling(); + if (nextSib != null) { + parent.insertBefore(otherNode, nextSib); + } else { + parent.appendChild(otherNode); + } + } + + protected void adoptAsReplacement(ThreadContext context, + Node parentNode, + Node thisNode, Node otherNode) { + if (parentNode == null) { + /* nothing to replace? */ + return; + } + + try { + parentNode.replaceChild(otherNode, thisNode); + } catch (Exception e) { + String prefix = "could not replace child: "; + throw context.getRuntime().newRuntimeError(prefix + e.toString()); + } + } + + /** + * Replace this with other. + */ + @JRubyMethod + public IRubyObject replace_node(ThreadContext context, + IRubyObject other) { + return adoptAs(context, AdoptScheme.REPLACEMENT, other); + } + + /** + * Add other as a sibling before this. + */ + @JRubyMethod + public IRubyObject add_previous_sibling_node(ThreadContext context, + IRubyObject other) { + return adoptAs(context, AdoptScheme.PREV_SIBLING, other); + } + + /** + * Add other as a sibling after this. + */ + @JRubyMethod + public IRubyObject add_next_sibling_node(ThreadContext context, + IRubyObject other) { + return adoptAs(context, AdoptScheme.NEXT_SIBLING, other); + } } diff --git a/ext/java/nokogiri/XmlNodeSet.java b/ext/java/nokogiri/XmlNodeSet.java index 960549920af..6561037b007 100644 --- a/ext/java/nokogiri/XmlNodeSet.java +++ b/ext/java/nokogiri/XmlNodeSet.java @@ -184,4 +184,4 @@ public int getLength() { } } -} \ No newline at end of file +} diff --git a/ext/java/nokogiri/XmlNotation.java b/ext/java/nokogiri/XmlNotation.java deleted file mode 100644 index 41a534e1c21..00000000000 --- a/ext/java/nokogiri/XmlNotation.java +++ /dev/null @@ -1,44 +0,0 @@ -package nokogiri; - -import org.apache.xerces.dom.DeferredNotationImpl; -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.RubyStruct; -import org.jruby.anno.JRubyMethod; -import org.jruby.javasupport.JavaUtil; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Node; - -/** - * NOTATION declaration of DTD - * - * @author Yoko Harada - */ -public class XmlNotation extends RubyStruct { - private DeferredNotationImpl internalNode; - - public XmlNotation(Ruby runtime, RubyClass klazz, Node node) { - super(runtime, klazz); - internalNode = (DeferredNotationImpl) node; - } - - public Node getNode() { - return internalNode; - } - - @JRubyMethod - public IRubyObject name(ThreadContext context) { - return JavaUtil.convertJavaToRuby(context.getRuntime(), internalNode.getNodeName()); - } - - @JRubyMethod - public IRubyObject system_id(ThreadContext context) { - return JavaUtil.convertJavaToRuby(context.getRuntime(), internalNode.getSystemId()); - } - - @JRubyMethod - public IRubyObject public_id(ThreadContext context) { - return JavaUtil.convertJavaToRuby(context.getRuntime(), internalNode.getPublicId()); - } -} diff --git a/ext/java/nokogiri/XmlProcessingInstruction.java b/ext/java/nokogiri/XmlProcessingInstruction.java new file mode 100644 index 00000000000..82de99d5dd4 --- /dev/null +++ b/ext/java/nokogiri/XmlProcessingInstruction.java @@ -0,0 +1,69 @@ +package nokogiri; + +import nokogiri.internals.SaveContext; +import nokogiri.XmlNode; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.javasupport.util.RuntimeHelpers; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Node; +import org.w3c.dom.Document; + +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; + +/** + * + * @author sergio + */ +public class XmlProcessingInstruction extends XmlNode { + + public XmlProcessingInstruction(Ruby ruby, RubyClass klass, Node node) { + super(ruby, klass, node); + } + + @JRubyMethod(name="new", meta=true, rest=true, required=3) + public static IRubyObject rbNew(ThreadContext context, + IRubyObject klass, + IRubyObject[] args) { + + IRubyObject doc = args[0]; + IRubyObject target = args[1]; + IRubyObject data = args[2]; + + Document document = ((XmlNode) doc).getOwnerDocument(); + Node node = + document.createProcessingInstruction(rubyStringToString(target), + rubyStringToString(data)); + XmlProcessingInstruction self = + new XmlProcessingInstruction(context.getRuntime(), + (RubyClass) klass, + node); + + RuntimeHelpers.invoke(context, self, "initialize", args); + + // TODO: if_block_given. + + return self; + } + + @Override + public boolean isProcessingInstruction() { return true; } + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + ctx.append(""); + else + ctx.append("?>"); + } + +} diff --git a/ext/java/nokogiri/XmlSaxParserContext.java b/ext/java/nokogiri/XmlSaxParserContext.java index 384576cf23c..78ea628d963 100644 --- a/ext/java/nokogiri/XmlSaxParserContext.java +++ b/ext/java/nokogiri/XmlSaxParserContext.java @@ -1,142 +1,303 @@ package nokogiri; -import java.io.ByteArrayInputStream; -import java.io.FileInputStream; +import java.io.InputStream; import java.io.IOException; -import java.io.StringReader; import nokogiri.internals.NokogiriHandler; -import org.jruby.*; +import nokogiri.internals.ParserContext; +import nokogiri.internals.XmlSaxParser; +import org.apache.xerces.parsers.AbstractSAXParser; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyIO; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.RubyObjectAdapter; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; +import org.jruby.javasupport.JavaEmbedUtils; import org.jruby.javasupport.util.RuntimeHelpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Visibility; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; import org.jruby.util.TypeConverter; +import org.xml.sax.ContentHandler; +import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; -import org.xml.sax.XMLReader; -import org.xml.sax.ext.DefaultHandler2; -import org.xml.sax.helpers.XMLReaderFactory; import static org.jruby.javasupport.util.RuntimeHelpers.invoke; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; -public class XmlSaxParserContext extends RubyObject { - private InputSource source; - private XMLReader reader; +/** + * Base class for the SAX parsers. + * + * @author Patrick Mahoney + */ +public class XmlSaxParserContext extends ParserContext { + protected static final String FEATURE_NAMESPACES = + "http://xml.org/sax/features/namespaces"; + protected static final String FEATURE_NAMESPACE_PREFIXES = + "http://xml.org/sax/features/namespace-prefixes"; + + protected AbstractSAXParser parser; public XmlSaxParserContext(final Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); - try { - reader = XMLReaderFactory.createXMLReader(); - } catch (SAXException se) { + parser = createParser(); + } catch (SAXException se) { throw RaiseException.createNativeRaiseException(ruby, se); } } + protected AbstractSAXParser createParser() throws SAXException { + XmlSaxParser parser = new XmlSaxParser(); + parser.setFeature(FEATURE_NAMESPACE_PREFIXES, true); + return parser; + } + + /** + * Create a new parser context that will parse the string + * data. + */ @JRubyMethod(name="memory", meta=true) - public static IRubyObject parse_memory(ThreadContext context, IRubyObject klazz, IRubyObject data) { - ByteList byteList = data.convertToString().getByteList(); - ByteArrayInputStream bais = new ByteArrayInputStream(byteList.unsafeBytes(), byteList.begin(), byteList.length()); + public static IRubyObject parse_memory(ThreadContext context, + IRubyObject klazz, + IRubyObject data) { + XmlSaxParserContext ctx = new XmlSaxParserContext(context.getRuntime(), + (RubyClass) klazz); + ctx.setInputSource(context, data); + return ctx; + } - XmlSaxParserContext ctx = new XmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); + /** + * Create a new parser context that will read from the file + * data and parse. + */ + @JRubyMethod(name="file", meta=true) + public static IRubyObject parse_file(ThreadContext context, + IRubyObject klazz, + IRubyObject data) { + XmlSaxParserContext ctx = new XmlSaxParserContext(context.getRuntime(), + (RubyClass) klazz); + ctx.setInputSourceFile(context, data); + return ctx; + } - ctx.source = new InputSource(bais); + /** + * Create a new parser context that will read from the IO or + * StringIO data and parse. + * + * TODO: Currently ignores encoding enc. + */ + @JRubyMethod(name="io", meta=true) + public static IRubyObject parse_io(ThreadContext context, + IRubyObject klazz, + IRubyObject data, + IRubyObject enc) { + //int encoding = (int)enc.convertToInteger().getLongValue(); + XmlSaxParserContext ctx = new XmlSaxParserContext(context.getRuntime(), + (RubyClass) klazz); + ctx.setInputSource(context, data); + return ctx; + } - return ctx; + /** + * Create a new parser context that will read from a raw input + * stream. Not a JRuby method. Meant to be run in a separate + * thread by XmlSaxPushParser. + */ + public static IRubyObject parse_stream(ThreadContext context, + IRubyObject klazz, + InputStream stream) { + XmlSaxParserContext ctx = + new XmlSaxParserContext(context.getRuntime(), (RubyClass)klazz); + ctx.setInputSource(stream); + return ctx; } - @JRubyMethod(name="file", meta=true) - public static IRubyObject parse_file(ThreadContext context, IRubyObject klazz, IRubyObject data) { - String filename = data.convertToString().asJavaString(); + /** + * Set a property of the underlying parser. + */ + protected void setProperty(String key, Object val) + throws SAXNotRecognizedException, SAXNotSupportedException { + parser.setProperty(key, val); + } - XmlSaxParserContext ctx = new XmlSaxParserContext(context.getRuntime(), (RubyClass) klazz); + protected void setContentHandler(ContentHandler handler) { + parser.setContentHandler(handler); + } - try{ - ctx.source = new InputSource(new FileInputStream(filename)); - } catch (Exception ex) { - throw RaiseException.createNativeRaiseException(context.getRuntime(), ex); - } + protected void setErrorHandler(ErrorHandler handler) { + parser.setErrorHandler(handler); + } - return ctx; + /** + * Perform any initialization prior to parsing with the handler + * handlerRuby. Convenience hook for subclasses. + */ + protected void preParse(ThreadContext context, + IRubyObject handlerRuby, + NokogiriHandler handler) { + ((XmlSaxParser) parser).setXmlDeclHandler(handler); } - @JRubyMethod(name="io", meta=true) - public static IRubyObject native_parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, IRubyObject enc) { - Ruby ruby = context.getRuntime(); - int encoding = (int)enc.convertToInteger().getLongValue(); - - // The FFI version has some special logic to account for the case where we get a StringIO - // object: - - boolean isStringIO = invoke(context, data, "is_a?", ruby.getClassFromPath("StringIO")).isTrue(); - XmlSaxParserContext ctx = new XmlSaxParserContext(ruby, (RubyClass) klazz); - if (isStringIO) { - IRubyObject ioString = invoke(context, data, "string"); - ctx.source = new InputSource(new StringReader(ioString.asJavaString())); - } else { - RubyIO io = (RubyIO) TypeConverter.convertToType(data, ruby.getIO(), "to_io"); - ctx.source = new InputSource(io.getInStream()); - } + protected void postParse(ThreadContext context, + IRubyObject handlerRuby, + NokogiriHandler handler) { + // noop + } - return ctx; + protected void do_parse() throws SAXException, IOException { + parser.parse(getInputSource()); } - @JRubyMethod() - public IRubyObject parse_with(ThreadContext context, IRubyObject handlerRuby) { - Ruby ruby = context.getRuntime(); + @JRubyMethod + public IRubyObject parse_with(ThreadContext context, + IRubyObject handlerRuby) { + Ruby ruby = context.getRuntime(); - if(!invoke(context, handlerRuby, "kind_of?", - ruby.getClassFromPath("Nokogiri::XML::SAX::Parser")).isTrue()) { - throw ruby.newArgumentError("argument must be a Nokogiri::XML::SAX::Parser"); - } + if(!invoke(context, handlerRuby, "respond_to?", + ruby.newSymbol("document")).isTrue()) { + String msg = "argument must respond_to document"; + throw ruby.newArgumentError(msg); + } - DefaultHandler2 handler = new NokogiriHandler(ruby, handlerRuby); + NokogiriHandler handler = new NokogiriHandler(ruby, handlerRuby); + preParse(context, handlerRuby, handler); - this.reader.setContentHandler(handler); - this.reader.setErrorHandler(handler); + setContentHandler(handler); + setErrorHandler(handler); - try{ - this.reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); - } catch(Exception ex) { - System.out.println("Problem while creating XML SAX Parser: "+ex.toString()); - } + try{ + setProperty("http://xml.org/sax/properties/lexical-handler", + handler); + } catch(Exception ex) { + throw ruby.newRuntimeError( + "Problem while creating XML SAX Parser: " + ex.toString()); + } - try{ + try{ try { - this.reader.parse(this.source); + do_parse(); } catch(SAXParseException spe) { - // A bad document () should call the error handler instead of raising a - // SAX exception. + // A bad document () should call the + // error handler instead of raising a SAX exception. - // However, an EMPTY document should raise a RuntimeError. This is a bit kludgy, but - // AFAIK SAX doesn't distinguish between empty and bad whereas Nokogiri does. + // However, an EMPTY document should raise a + // RuntimeError. This is a bit kludgy, but AFAIK SAX + // doesn't distinguish between empty and bad whereas + // Nokogiri does. String message = spe.getMessage(); if ("Premature end of file.".matches(message)) { - throw context.getRuntime().newRuntimeError("couldn't parse document: "+ message); + throw ruby.newRuntimeError( + "couldn't parse document: " + message); } else { handler.error(spe); } } - } catch(SAXException se) { - throw RaiseException.createNativeRaiseException(ruby, se); - } catch(IOException ioe) { - throw ruby.newIOErrorFromException(ioe); - } + } catch(SAXException se) { + throw RaiseException.createNativeRaiseException(ruby, se); + } catch(IOException ioe) { + throw ruby.newIOErrorFromException(ioe); + } - return this; - } + postParse(context, handlerRuby, handler); + maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby); + + return ruby.getNil(); + } + + /** + * Can take a boolean assignment. + * + * @param context + * @param value + * @return + */ @JRubyMethod(name = "replace_entities=") - public IRubyObject set_replace_entities(ThreadContext context, IRubyObject value) { - System.out.println("replace entities called with " + value.toString()); - + public IRubyObject set_replace_entities(ThreadContext context, + IRubyObject value) { + if (!value.isTrue()) { + throw context.getRuntime() + .newRuntimeError("Not replacing entities is unsupported"); + } + return this; } -} \ No newline at end of file + @JRubyMethod(name="replace_entities") + public IRubyObject get_replace_entities(ThreadContext context, + IRubyObject value) { + return context.getRuntime().getTrue(); + } + + + /** + * If the handler's document is a FragmentHandler, attempt to trim + * leading and trailing whitespace. + * + * This is a bit hackish and depends heavily on the internals of + * FragmentHandler. + */ + protected void maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, + IRubyObject parser) { + final String path = "Nokogiri::XML::FragmentHandler"; + RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter(); + RubyModule mod = + context.getRuntime().getClassFromPath(path); + + IRubyObject handler = adapter.getInstanceVariable(parser, "@document"); + if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) + return; + IRubyObject stack = adapter.getInstanceVariable(handler, "@stack"); + if (stack == null || stack.isNil()) + return; + // doc is finally a DocumentFragment whose nodes we can check + IRubyObject doc = adapter.callMethod(stack, "first"); + if (doc == null || doc.isNil()) + return; + + IRubyObject children; + + for (;;) { + children = adapter.callMethod(doc, "children"); + IRubyObject first = adapter.callMethod(children, "first"); + if (isWhitespaceText(context, first)) + adapter.callMethod(first, "unlink"); + else + break; + } + + for (;;) { + children = adapter.callMethod(doc, "children"); + IRubyObject last = adapter.callMethod(children, "last"); + if (isWhitespaceText(context, last)) + adapter.callMethod(last, "unlink"); + else + break; + } + + // While we have a document, normalize it. + ((XmlNode) doc).normalize(); + } + + protected boolean isWhitespaceText(ThreadContext context, IRubyObject obj) { + if (obj == null || obj.isNil()) return false; + + XmlNode node = (XmlNode) obj; + if (!(node instanceof XmlText)) + return false; + + String content = rubyStringToString(node.content(context)); + return content.trim().isEmpty(); + } + +} diff --git a/ext/java/nokogiri/XmlSaxPushParser.java b/ext/java/nokogiri/XmlSaxPushParser.java new file mode 100644 index 00000000000..91fb1564e60 --- /dev/null +++ b/ext/java/nokogiri/XmlSaxPushParser.java @@ -0,0 +1,114 @@ +package nokogiri; + +import java.io.InputStream; +import java.io.IOException; +import java.nio.channels.ClosedChannelException; +import java.lang.InterruptedException; +import java.lang.Runnable; +import java.lang.Thread; +import nokogiri.internals.PushInputStream; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyIO; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyMethod; +import org.jruby.exceptions.RaiseException; +import org.jruby.javasupport.util.RuntimeHelpers; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.xml.sax.InputSource; + +import static org.jruby.javasupport.util.RuntimeHelpers.invoke; + +public class XmlSaxPushParser extends RubyObject { + IRubyObject options; + PushInputStream stream; + Thread reader; + + public XmlSaxPushParser(Ruby ruby, RubyClass rubyClass) { + super(ruby, rubyClass); + } + + @JRubyMethod + public IRubyObject initialize_native(final ThreadContext context, + IRubyObject _saxParser, + IRubyObject fileName) { + options = invoke(context, context.getRuntime() + .getClassFromPath("Nokogiri::XML::ParseOptions"), + "new"); + stream = new PushInputStream(); + + Runner runner = new Runner(context, this, stream); + + reader = new Thread(runner); + reader.start(); + + return this; + } + + @JRubyMethod(name="options") + public IRubyObject getOptions(ThreadContext context) { + return invoke(context, options, "options"); + } + + @JRubyMethod(name="options=") + public IRubyObject setOptions(ThreadContext context, IRubyObject val) { + invoke(context, options, "options=", val); + return getOptions(context); + } + + @JRubyMethod + public IRubyObject native_write(ThreadContext context, IRubyObject chunk, + IRubyObject isLast) { + byte[] data = chunk.toString().getBytes(); + + try { + stream.writeAndWaitForRead(data); + } catch (ClosedChannelException e) { + // ignore + } catch (IOException e) { + throw context.getRuntime().newRuntimeError(e.toString()); + } + + if (isLast.isTrue()) { + try { + stream.close(); + } catch (IOException e) { + // ignore + } + + for (;;) { + try { + reader.join(); + break; + } catch (InterruptedException e) { + // continue loop + } + } + } + return this; + } + + protected static class Runner implements Runnable { + protected ThreadContext context; + protected IRubyObject handler; + protected XmlSaxParserContext parser; + + public Runner(ThreadContext context, + IRubyObject handler, + InputStream stream) { + RubyClass klazz = (RubyClass) context.getRuntime() + .getClassFromPath("Nokogiri::XML::SAX::ParserContext"); + + this.context = context; + this.handler = handler; + this.parser = (XmlSaxParserContext) + XmlSaxParserContext.parse_stream(context, klazz, stream); + } + + public void run() { + parser.parse_with(context, handler); + } + } +} diff --git a/ext/java/nokogiri/XmlSyntaxError.java b/ext/java/nokogiri/XmlSyntaxError.java index d7c10f31406..a7dc827afe2 100644 --- a/ext/java/nokogiri/XmlSyntaxError.java +++ b/ext/java/nokogiri/XmlSyntaxError.java @@ -44,15 +44,15 @@ public static RubyException getXPathSyntaxError(ThreadContext context, Exception //TODO: Return correct message, domain, etc. - @JRubyMethod - @Override - public IRubyObject message(ThreadContext context) { - if(this.exception != null) { - return context.getRuntime().newString(this.exception.toString()); - } else { - return context.getRuntime().newString("no message"); - } - } +// @JRubyMethod +// @Override +// public IRubyObject message(ThreadContext context) { +// if(this.exception != null) { +// return context.getRuntime().newString(this.exception.toString()); +// } else { +// return context.getRuntime().newString("no message"); +// } +// } @JRubyMethod public IRubyObject domain(ThreadContext context) { @@ -103,4 +103,4 @@ public IRubyObject int1(ThreadContext context) { public IRubyObject column(ThreadContext context) { return context.getRuntime().getNil(); } -} \ No newline at end of file +} diff --git a/ext/java/nokogiri/XmlText.java b/ext/java/nokogiri/XmlText.java index 3bbcd456298..e3fff8c2636 100644 --- a/ext/java/nokogiri/XmlText.java +++ b/ext/java/nokogiri/XmlText.java @@ -1,25 +1,37 @@ package nokogiri; +import java.lang.RuntimeException; +import nokogiri.internals.NokogiriHelpers; +import nokogiri.internals.SaveContext; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.w3c.dom.Node; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; + public class XmlText extends XmlNode { public XmlText(Ruby ruby, RubyClass rubyClass, Node node) { super(ruby, rubyClass, node); } @JRubyMethod(name = "new", meta = true) - public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject text, IRubyObject xNode) { - XmlNode xmlNode = (XmlNode)xNode; - XmlDocument xmlDoc = (XmlDocument)xmlNode.document(context); - Document document = xmlDoc.getDocument(); - Node node = document.createTextNode(text.convertToString().asJavaString()); - return XmlNode.constructNode(context.getRuntime(), node); - } + public static IRubyObject rbNew(ThreadContext context, IRubyObject cls, IRubyObject text, IRubyObject xNode) { + XmlNode xmlNode = asXmlNode(context, xNode); + XmlDocument xmlDoc = (XmlDocument)xmlNode.document(context); + Document document = xmlDoc.getDocument(); + String content = rubyStringToString(encode_special_chars(context, text)); + Node node = document.createTextNode(content); + return new XmlText(context.getRuntime(), (RubyClass) cls, node); + } + + @Override + public void saveContent(ThreadContext context, SaveContext ctx) { + ctx.append(rubyStringToString(content(context))); + } } diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index 225c8a6dc74..10229d2e3cb 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -1,7 +1,5 @@ package nokogiri; -import java.util.Set; - import nokogiri.internals.NokogiriNamespaceContext; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpression; @@ -38,12 +36,6 @@ public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject String src = expr.convertToString().asJavaString(); try { if(!handler.isNil()) { - if (!isContainsPrefix(src)) { - Set methodNames = handler.getMetaClass().getMethods().keySet(); - for (String name : methodNames) { - src = src.replaceAll(name, NokogiriNamespaceContext.NOKOGIRI_PREFIX+":"+name); - } - } xpath.setXPathFunctionResolver(new NokogiriXPathFunctionResolver(handler)); } XPathExpression xpathExpression = xpath.compile(src); @@ -52,16 +44,6 @@ public IRubyObject evaluate(ThreadContext context, IRubyObject expr, IRubyObject throw new RaiseException(XmlSyntaxError.getXPathSyntaxError(context, xpee)); } } - - private boolean isContainsPrefix(String str) { - Set prefixes = ((NokogiriNamespaceContext)xpath.getNamespaceContext()).getAllPrefixes(); - for (String prefix : prefixes) { - if (str.contains(prefix + ":")) { - return true; - } - } - return false; - } @JRubyMethod public IRubyObject evaluate(ThreadContext context, IRubyObject expr) { @@ -73,4 +55,4 @@ public IRubyObject register_ns(ThreadContext context, IRubyObject prefix, IRubyO ((NokogiriNamespaceContext) this.xpath.getNamespaceContext()).registerNamespace(prefix.convertToString().asJavaString(), uri.convertToString().asJavaString()); return this; } -} \ No newline at end of file +} diff --git a/ext/java/nokogiri/internals/HtmlDocumentImpl.java b/ext/java/nokogiri/internals/HtmlDocumentImpl.java deleted file mode 100644 index 136f5c8e640..00000000000 --- a/ext/java/nokogiri/internals/HtmlDocumentImpl.java +++ /dev/null @@ -1,66 +0,0 @@ -package nokogiri.internals; - -import nokogiri.HtmlDocument; -import nokogiri.XmlDocument; -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.runtime.ThreadContext; -import org.w3c.dom.Document; -import org.w3c.dom.DocumentType; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class HtmlDocumentImpl extends XmlDocumentImpl { - - public HtmlDocumentImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - public XmlNode dup_impl(ThreadContext context, XmlDocument current, boolean deep, RubyClass klazz) { - Document newDoc = (Document) current.getDocument().cloneNode(deep); - - return new HtmlDocument(context.getRuntime(), klazz, newDoc); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 13; } - - @Override - public void saveContent(ThreadContext context, XmlNode document, SaveContext ctx) { - Document doc = (Document) document.getNode(); - DocumentType dtd = doc.getDoctype(); - - if(dtd != null) { - ctx.append("\n"); - } - - this.saveNodeListContentAsHtml(context, - (XmlNodeSet) this.children(context, document), ctx); - - ctx.append("\n"); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode node, SaveContext ctx) { - this.saveContent(context, node, ctx); - } -} diff --git a/ext/java/nokogiri/internals/HtmlDomParserContext.java b/ext/java/nokogiri/internals/HtmlDomParserContext.java new file mode 100644 index 00000000000..e79a73d6fda --- /dev/null +++ b/ext/java/nokogiri/internals/HtmlDomParserContext.java @@ -0,0 +1,96 @@ +package nokogiri.internals; + +import java.io.IOException; +import java.io.InputStream; +import javax.xml.parsers.ParserConfigurationException; +import nokogiri.HtmlDocument; +import nokogiri.XmlDocument; +import org.apache.xerces.parsers.DOMParser; +import org.apache.xerces.xni.Augmentations; +import org.apache.xerces.xni.QName; +import org.apache.xerces.xni.XMLAttributes; +import org.apache.xerces.xni.XNIException; +import org.apache.xerces.xni.parser.XMLDocumentFilter; +import org.apache.xerces.xni.parser.XMLParserConfiguration; +import org.cyberneko.html.HTMLConfiguration; +import org.cyberneko.html.filters.DefaultFilter; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import static nokogiri.internals.NokogiriHelpers.isNamespace; + +/** + * + * @author sergio + */ +public class HtmlDomParserContext extends XmlDomParserContext { + protected static final String PROPERTY_FILTERS = + "http://cyberneko.org/html/properties/filters"; + + public HtmlDomParserContext(Ruby runtime, IRubyObject options) { + super(runtime, options); + } + + public HtmlDomParserContext(Ruby runtime, long options) { + super(runtime, options); + } + + @Override + protected void initParser() { + XMLParserConfiguration config = new HTMLConfiguration(); + config.setProperty("http://cyberneko.org/html/properties/names/elems", + "lower"); + config.setProperty("http://cyberneko.org/html/properties/names/attrs", + "lower"); + + XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter(); + XMLDocumentFilter[] filters = { removeNSAttrsFilter }; + + parser = new DOMParser(config); + setFeature("http://xml.org/sax/features/namespaces", false); + setProperty(PROPERTY_FILTERS, filters); + } + + @Override + protected XmlDocument getNewEmptyDocument(ThreadContext context) { + IRubyObject[] args = new IRubyObject[0]; + return (XmlDocument) XmlDocument.rbNew(context, + context.getRuntime().getClassFromPath("Nokogiri::XML::Document"), + args); + } + + @Override + protected XmlDocument wrapDocument(ThreadContext context, + RubyClass klass, + Document doc) { + return new HtmlDocument(context.getRuntime(), klass, doc); + } + + /** + * Filter to strip out attributes that pertain to XML namespaces. + * + * @author sergio + * @author Patrick Mahoney + */ + public static class RemoveNSAttrsFilter extends DefaultFilter { + @Override + public void startElement(QName element, XMLAttributes attrs, + Augmentations augs) throws XNIException { + int i; + for (i = 0; i < attrs.getLength(); ++i) { + if (isNamespace(attrs.getQName(i))) { + attrs.removeAttributeAt(i); + --i; + } + } + + element.uri = null; + super.startElement(element, attrs, augs); + } + } +} diff --git a/ext/java/nokogiri/internals/HtmlEmptyDocumentImpl.java b/ext/java/nokogiri/internals/HtmlEmptyDocumentImpl.java deleted file mode 100644 index 1aa01d1b8e0..00000000000 --- a/ext/java/nokogiri/internals/HtmlEmptyDocumentImpl.java +++ /dev/null @@ -1,31 +0,0 @@ -package nokogiri.internals; - -import nokogiri.HtmlDocument; -import nokogiri.XmlDocument; -import nokogiri.XmlNode; -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class HtmlEmptyDocumentImpl extends XmlEmptyDocumentImpl{ - - public HtmlEmptyDocumentImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - protected void changeInternalNode(ThreadContext context, XmlDocument doc) { - doc.setInternalNode(new HtmlDocumentImpl(context.getRuntime(), doc.getDocument())); - } - - @Override - public XmlNode dup_impl(ThreadContext context, XmlDocument current, boolean deep, RubyClass klazz) { - return (XmlNode) HtmlDocument.rbNew(context, klazz, new IRubyObject[0]); - } -} diff --git a/ext/java/nokogiri/internals/HtmlParseOptions.java b/ext/java/nokogiri/internals/HtmlParseOptions.java deleted file mode 100644 index 000efc41831..00000000000 --- a/ext/java/nokogiri/internals/HtmlParseOptions.java +++ /dev/null @@ -1,74 +0,0 @@ -package nokogiri.internals; - -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import javax.xml.parsers.ParserConfigurationException; -import nokogiri.XmlDocument; -import org.apache.xerces.parsers.DOMParser; -import org.apache.xerces.xni.parser.XMLParserConfiguration; -import org.cyberneko.html.HTMLConfiguration; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Document; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -/** - * - * @author sergio - */ -public class HtmlParseOptions extends ParseOptions{ - - public HtmlParseOptions(IRubyObject options) { - super(options); - } - - public HtmlParseOptions(long options) { - super(options); - } - - @Override - protected XmlDocument getNewEmptyDocument(ThreadContext context) { - IRubyObject[] args = new IRubyObject[0]; - return (XmlDocument) XmlDocument.rbNew(context, - context.getRuntime().getClassFromPath("Nokogiri::XML::Document"), - args); - } - - @Override - public Document parse(InputSource input) - throws ParserConfigurationException, SAXException, IOException { - XMLParserConfiguration config = new HTMLConfiguration(); - config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); - config.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); - - DOMParser parser = new DOMParser(config); - parser.setFeature("http://xml.org/sax/features/namespaces", false); -// parser.setProperty("http://cyberneko.org/html/properties/filters", -// new XMLDocumentFilter[] { new DefaultFilter() { -// @Override -// public void startElement(QName element, XMLAttributes attrs, -// Augmentations augs) throws XNIException { -// element.uri = null; -// super.startElement(element, attrs, augs); -// } -// }}); - - parser.parse(input); - return parser.getDocument(); - } - - @Override - public Document parse(InputStream input) - throws ParserConfigurationException, SAXException, IOException { - return this.parse(new InputSource(input)); - } - - @Override - public Document parse(String input) - throws ParserConfigurationException, SAXException, IOException { - StringReader sr = new StringReader(input); - return this.parse(new InputSource(sr)); - } -} diff --git a/ext/java/nokogiri/internals/NokogiriHandler.java b/ext/java/nokogiri/internals/NokogiriHandler.java index 732482e5b66..89008f154d6 100644 --- a/ext/java/nokogiri/internals/NokogiriHandler.java +++ b/ext/java/nokogiri/internals/NokogiriHandler.java @@ -1,8 +1,10 @@ package nokogiri.internals; import nokogiri.XmlAttr; +import nokogiri.internals.XmlDeclHandler; import org.jruby.Ruby; import org.jruby.RubyArray; +import org.jruby.RubyClass; import org.jruby.RubyString; import org.jruby.javasupport.util.RuntimeHelpers; import org.jruby.runtime.ThreadContext; @@ -14,22 +16,31 @@ import java.util.logging.Logger; +import static nokogiri.internals.NokogiriHelpers.isNamespace; +import static nokogiri.internals.NokogiriHelpers.getPrefix; +import static nokogiri.internals.NokogiriHelpers.getLocalPart; +import static nokogiri.internals.NokogiriHelpers.stringOrNil; + /** * * @author sergio */ -public class NokogiriHandler extends DefaultHandler2 { +public class NokogiriHandler extends DefaultHandler2 + implements XmlDeclHandler { private static Logger LOGGER = Logger.getLogger(NokogiriHandler.class.getName()); boolean inCDATA = false; private Ruby ruby; + private RubyClass attrClass; private IRubyObject object; private boolean namespaceDefined = false; public NokogiriHandler(Ruby ruby, IRubyObject object) { this.ruby = ruby; + this.attrClass = (RubyClass) ruby.getClassFromPath( + "Nokogiri::XML::SAX::Parser::Attribute"); this.object = object; } @@ -38,69 +49,81 @@ public void startDocument() throws SAXException { call("start_document"); } + public void xmlDecl(String version, String encoding, String standalone) { + call("xmldecl", stringOrNil(ruby, version), + stringOrNil(ruby, encoding), + stringOrNil(ruby, standalone)); + } + @Override public void endDocument() throws SAXException { call("end_document"); } - /** - * @return true if an XML namespace has been defined in the document, false otherwise. + /* + * This has to call either "start_element" or + * "start_element_namespace" depending on whether there are any + * namespace attributes. + * + * Attributes that define namespaces are passed in a separate + * array of of [:prefix, :uri] arrays and are not + * passed with the other attributes. */ - private boolean isNamespaceDefined() { - // Determining the namespace is important because we only want - // start_element_namespace to be called if we have an 'xmlns' somewhere in the - // document, even if the attribute or element is defined with foo:bar. - return namespaceDefined; - } + @Override + public void startElement(String uri, String localName, String qName, + Attributes attrs) throws SAXException { + // for attributes other than namespace attrs + RubyArray rubyAttr = RubyArray.newArray(ruby); + // for namespace defining attributes + RubyArray rubyNSAttr = RubyArray.newArray(ruby); - private void inspectElementForNamespace(String qName, Attributes attrs) { - LOGGER.fine("inspectElementForNamespace: qName = " + qName + ", attrs = " + attrs.toString()); - if (qName.equals("xmlns") || qName.startsWith("xmlns:")) { - namespaceDefined = true; - } + ThreadContext context = ruby.getCurrentContext(); for (int i = 0; i < attrs.getLength(); i++) { - if (attrs.getQName(i).startsWith("xmlns")) { - namespaceDefined = true; - break; + String u = attrs.getURI(i); + String qn = attrs.getQName(i); + String ln = attrs.getLocalName(i); + String val = attrs.getValue(i); + String pre; + + pre = getPrefix(qn); + if (ln == null || ln.equals("")) ln = getLocalPart(qn); + + if (isNamespace(qn)) { + RubyArray ns = RubyArray.newArray(ruby, 2); + if (ln.equals("xmlns")) ln = null; + ns.add(stringOrNil(ruby, ln)); + ns.add(ruby.newString(val)); + rubyNSAttr.add(ns); + } else { + IRubyObject[] args = new IRubyObject[4]; + args[0] = stringOrNil(ruby, ln); + args[1] = stringOrNil(ruby, pre); + args[2] = stringOrNil(ruby, u); + args[3] = stringOrNil(ruby, val); + + IRubyObject attr = + RuntimeHelpers.invoke(context, attrClass, "new", args); + rubyAttr.add(attr); } } - } - - /* - * This has to call either "start_element" or "start_element_namespace" depending on whether there - * are any namespace attributes. - */ - @Override - public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { - int attributeLength = attrs.getLength(); - RubyArray rubyAttributes = RubyArray.newArray(ruby, attributeLength); - - inspectElementForNamespace(qName, attrs); - - if (attributeLength > 0) { - // We expect attr to have "attr.prefix" - // We expect attr to have "attr.localname" - // We expect attr to have "attr.uri" - for (int i = 0; i < attributeLength; i++) { - String u = attrs.getURI(i); - String q = attrs.getQName(i); - String n = attrs.getLocalName(i); - String v = attrs.getValue(i); - //System.out.println("qName = " + q + ", localName = " + n + ", uri = " + u + "other uri = " + uri); - XmlSaxAttribute attr = new XmlSaxAttribute(ruby, u, q, n, v); - rubyAttributes.add(attr); - } - call("start_element_namespace", ruby.newString(qName), rubyAttributes); - } else { - call("start_element", ruby.newString(qName), rubyAttributes); - } + if (localName == null || localName.equals("")) + localName = getLocalPart(qName); + call("start_element_namespace", + stringOrNil(ruby, localName), + rubyAttr, + stringOrNil(ruby, getPrefix(qName)), + stringOrNil(ruby, uri), + rubyNSAttr); } - + @Override public void endElement(String uri, String localName, String qName) throws SAXException { - call("end_element", ruby.newString(qName)); + call("end_element_namespace", + stringOrNil(ruby, localName), + stringOrNil(ruby, getPrefix(qName)), + stringOrNil(ruby, uri)); } @Override @@ -155,56 +178,31 @@ private void call(String methodName, IRubyObject arg1, IRubyObject arg2) { RuntimeHelpers.invoke(context, document(context), methodName, arg1, arg2); } - private IRubyObject document(ThreadContext context){ - return RuntimeHelpers.invoke(context, this.object, "document"); + private void call(String methodName, IRubyObject arg1, IRubyObject arg2, + IRubyObject arg3) { + ThreadContext context = ruby.getCurrentContext(); + RuntimeHelpers.invoke(context, document(context), methodName, + arg1, arg2, arg3); + } + + private void call(String methodName, + IRubyObject arg0, + IRubyObject arg1, + IRubyObject arg2, + IRubyObject arg3, + IRubyObject arg4) { + IRubyObject[] args = new IRubyObject[5]; + args[0] = arg0; + args[1] = arg1; + args[2] = arg2; + args[3] = arg3; + args[4] = arg4; + ThreadContext context = ruby.getCurrentContext(); + RuntimeHelpers.invoke(context, document(context), methodName, args); } - /* - * This is a "temporary" class to fix the test in test_parser.rb which expect attributes - * to have attr.prefix and attr.localname defined. - * - * TODO: Review to see if this class can be eliminated or refactored. - */ - public static final class XmlSaxAttribute { - private Ruby ruby; - private String uri; - private String qName; - private String localName; - private String value; - - public XmlSaxAttribute(Ruby ruby, String uri, String qName, String localName, String value) { - this.ruby = ruby; - this.uri = uri; - this.qName = qName; - this.localName = localName; - this.value = value; - } - - public RubyString getQName() { - return ruby.newString(this.qName); - } - - public RubyString getPrefix() { - int pos = this.qName.indexOf(':'); - String prefix; - if (pos > 0) { - prefix = this.qName.substring(0, pos); - } else { - prefix = this.qName; - } - return ruby.newString(prefix); - } - - public RubyString getLocalname() { - return ruby.newString(this.localName); - } - - public RubyString getValue() { - return ruby.newString(this.value); - } - - public RubyString getUri() { - return ruby.newString(this.uri); - } + private IRubyObject document(ThreadContext context){ + return RuntimeHelpers.invoke(context, this.object, "document"); } + } diff --git a/ext/java/nokogiri/internals/NokogiriHelpers.java b/ext/java/nokogiri/internals/NokogiriHelpers.java index 962684abe0f..a4330a79b1b 100644 --- a/ext/java/nokogiri/internals/NokogiriHelpers.java +++ b/ext/java/nokogiri/internals/NokogiriHelpers.java @@ -5,12 +5,16 @@ package nokogiri.internals; +import java.nio.charset.Charset; +import java.nio.ByteBuffer; + import nokogiri.XmlNode; import org.jruby.Ruby; import org.jruby.RubyArray; +import org.jruby.RubyString; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; import org.w3c.dom.Attr; -import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -35,20 +39,96 @@ public static IRubyObject getCachedNodeOrCreate(Ruby ruby, Node node) { return xmlNode; } - public static String getLocalName(String name) { - int index = name.indexOf(':'); - if(index == -1) { - return name; - } else { - return name.substring(index+1); - } + public static IRubyObject stringOrNil(Ruby ruby, String s) { + if (s == null) + return ruby.getNil(); + + return ruby.newString(s); + } + + /** + * Convert s to a RubyString, or if s is null or + * empty return RubyNil. + */ + public static IRubyObject nonEmptyStringOrNil(Ruby ruby, String s) { + if (s == null || s.isEmpty()) + return ruby.getNil(); + + return ruby.newString(s); + } + + /** + * Return the prefix of a qualified name like "prefix:local". + * Returns null if there is no prefix. + */ + public static String getPrefix(String qName) { + if (qName == null) return null; + + int pos = qName.indexOf(':'); + if (pos > 0) + return qName.substring(0, pos); + else + return null; + } + + /** + * Return the local part of a qualified name like "prefix:local". + * Returns qName if there is no prefix. + */ + public static String getLocalPart(String qName) { + if (qName == null) return null; + + int pos = qName.indexOf(':'); + if (pos > 0) + return qName.substring(pos + 1); + else + return qName; } public static String getLocalNameForNamespace(String name) { - String localName = getLocalName(name); + String localName = getLocalPart(name); return ("xmlns".equals(localName)) ? null : localName; } + protected static Charset utf8 = null; + protected static Charset getCharsetUTF8() { + if (utf8 == null) { + utf8 = Charset.forName("UTF-8"); + } + + return utf8; + } + + /** + * Converts a RubyString in to a Java String. Assumes the + * RubyString is encoded as UTF-8. This is generally the case for + * RubyStrings created with getRuntime().newString("java string"). + * It also seems to be the case for strings created within Ruby + * where $KCODE has not been set. + * + * Note that RubyString#toString() decodes the string data as + * ISO-8859-1 (See org.jruby.util.ByteList.java). This is not + * what you want if you have any multibyte characters in your + * UTF-8 string. + * + * FIXME: This really needs to be more robust in terms of + * detecting the encoding and properly converting to a Java + * String. It's unfortunate that RubyString#toString() doesn't do + * this for us. + */ + public static String rubyStringToString(IRubyObject str) { + return rubyStringToString(str.convertToString()); + } + + public static String rubyStringToString(RubyString str) { + ByteList byteList = str.getByteList(); + byte[] data = byteList.unsafeBytes(); + int offset = byteList.begin(); + int len = byteList.length(); + ByteBuffer buf = ByteBuffer.wrap(data, offset, len); + return getCharsetUTF8().decode(buf).toString(); + } + public static String getNodeCompletePath(Node node) { Node cur, tmp, next; @@ -58,7 +138,8 @@ public static String getNodeCompletePath(Node node) { String sep; String name; - int occur = 0, generic; + int occur = 0; + boolean generic; cur = node; @@ -66,7 +147,7 @@ public static String getNodeCompletePath(Node node) { name = ""; sep = "?"; occur = 0; - generic = 0; + generic = false; if(cur.getNodeType() == Node.DOCUMENT_NODE) { if(buffer.startsWith("/")) break; @@ -74,15 +155,16 @@ public static String getNodeCompletePath(Node node) { sep = "/"; next = null; } else if(cur.getNodeType() == Node.ELEMENT_NODE) { - generic = 0; + generic = false; sep = "/"; name = cur.getLocalName(); + if (name == null) name = cur.getNodeName(); if(cur.getNamespaceURI() != null) { if(cur.getPrefix() != null) { name = cur.getPrefix() + ":" + name; } else { - generic = 1; + generic = true; name = "*"; } } @@ -97,7 +179,7 @@ public static String getNodeCompletePath(Node node) { while(tmp != null) { if((tmp.getNodeType() == Node.ELEMENT_NODE) && - (generic != 0 || compareTwoNodes(tmp,cur))) { + (generic || fullNamesMatch(tmp, cur))) { occur++; } tmp = tmp.getPreviousSibling(); @@ -108,7 +190,7 @@ public static String getNodeCompletePath(Node node) { while(tmp != null && occur == 0) { if((tmp.getNodeType() == Node.ELEMENT_NODE) && - (generic != 0 || compareTwoNodes(tmp,cur))) { + (generic || fullNamesMatch(tmp,cur))) { occur++; } tmp = tmp.getNextSibling(); @@ -258,6 +340,24 @@ protected static boolean compareTwoNodes(Node m, Node n) { nodesAreEqual(m.getPrefix(), n.getPrefix()); } + protected static boolean fullNamesMatch(Node a, Node b) { + return a.getNodeName().equals(b.getNodeName()); + //return getFullName(a).equals(getFullName(b)); + } + + protected static String getFullName(Node n) { + String lname = n.getLocalName(); + String prefix = n.getPrefix(); + if (lname != null) { + if (prefix != null) + return prefix + ":" + lname; + else + return lname; + } else { + return n.getNodeName(); + } + } + private static boolean nodesAreEqual(Object a, Object b) { return (((a == null) && (a == null)) || (a != null) && (b != null) && @@ -283,17 +383,24 @@ public static String getNodeName(Node node) { } else if(name.equals("#text")) { return "text"; } else { - name = getLocalName(name); + name = getLocalPart(name); return (name == null) ? "" : name; } } + public static final String XMLNS_URI = + "http://www.w3.org/2000/xmlns/"; public static boolean isNamespace(Node node) { - return isNamespace(node.getNodeName()); + return (XMLNS_URI.equals(node.getNamespaceURI()) || + isNamespace(node.getNodeName())); + } + + public static boolean isNamespace(String nodeName) { + return (nodeName.equals("xmlns") || nodeName.startsWith("xmlns:")); } - public static boolean isNamespace(String string) { - return string.equals("xmlns") || string.startsWith("xmlns:"); + public static boolean isNonDefaultNamespace(Node node) { + return (isNamespace(node) && ! "xmlns".equals(node.getNodeName())); } public static String newQName(String newPrefix, Node node) { @@ -311,12 +418,4 @@ public static RubyArray nodeListToRubyArray(Ruby ruby, NodeList nodes) { } return n; } - - public static RubyArray namedNodeMapToRubyArray(Ruby ruby, NamedNodeMap map) { - RubyArray n = RubyArray.newArray(ruby, map.getLength()); - for(int i = 0; i < map.getLength(); i++) { - n.append(NokogiriHelpers.getCachedNodeOrCreate(ruby, map.item(i))); - } - return n; - } } diff --git a/ext/java/nokogiri/internals/NokogiriNamespaceCache.java b/ext/java/nokogiri/internals/NokogiriNamespaceCache.java index 366977caedc..b7401245f6c 100644 --- a/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +++ b/ext/java/nokogiri/internals/NokogiriNamespaceCache.java @@ -29,6 +29,7 @@ public XmlNamespace get(ThreadContext context, XmlNode node, String prefix, Stri this.cache.put(prefix, secondCache); } + if (href == null) return null; XmlNamespace ns = secondCache.get(href); if( ns == null) { diff --git a/ext/java/nokogiri/internals/NokogiriNamespaceContext.java b/ext/java/nokogiri/internals/NokogiriNamespaceContext.java index 57069b86d7d..32333841ade 100644 --- a/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +++ b/ext/java/nokogiri/internals/NokogiriNamespaceContext.java @@ -1,35 +1,31 @@ package nokogiri.internals; -import java.util.ArrayList; import java.util.Hashtable; import java.util.Iterator; -import java.util.List; -import java.util.Set; -import java.util.Map.Entry; - import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; -public class NokogiriNamespaceContext implements NamespaceContext { - public static final String NOKOGIRI_PREFIX = "nokogiri"; - public static final String NOKOGIRI_URI = "http://www.nokogiri.org/default_ns/ruby/extensions_functions"; +public class NokogiriNamespaceContext implements NamespaceContext{ + Hashtable register; public NokogiriNamespaceContext(){ this.register = new Hashtable(); - register.put(NOKOGIRI_PREFIX, NOKOGIRI_URI); } public String getNamespaceURI(String prefix) { - if (prefix == null) { - throw new IllegalArgumentException(); - } +// System.out.println("Asked for " + prefix); String uri = this.register.get(prefix); - if (uri != null) { + if(uri != null) { +// System.out.println("Returned "+uri); return uri; } - if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { +// System.out.println("Returned another url"); + + if(prefix == null) { + throw new IllegalArgumentException(); + } else if(prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) { uri = this.register.get(XMLConstants.XMLNS_ATTRIBUTE); return (uri == null) ? XMLConstants.XMLNS_ATTRIBUTE_NS_URI : uri; } @@ -38,33 +34,16 @@ public String getNamespaceURI(String prefix) { } public String getPrefix(String uri){ - if (uri == null) { - throw new IllegalArgumentException("uri is null"); - } else if (uri.equals(NOKOGIRI_URI)) { - return NOKOGIRI_PREFIX; - } return null; } - public Iterator getPrefixes(String uri){ - if (register == null) return null; - Set> entries = register.entrySet(); - List list = new ArrayList(); - for (Entry entry : entries) { - if (uri.equals(entry.getValue())) { - list.add(entry.getKey()); - } - } - return list.iterator(); - } - - public Set getAllPrefixes() { - if (register == null) return null; - return register.keySet(); + public Iterator getPrefixes(String uri){ + return null; } public void registerNamespace(String prefix, String uri){ - if ("xmlns".equals(prefix)) prefix = ""; + if("xmlns".equals(prefix)) prefix = ""; +// System.out.println("Registered prefix "+prefix+" with uri " + uri); this.register.put(prefix, uri); } -} \ No newline at end of file +} diff --git a/ext/java/nokogiri/internals/NokogiriUserDataHandler.java b/ext/java/nokogiri/internals/NokogiriUserDataHandler.java index b6348de8c8a..b12bf47a830 100644 --- a/ext/java/nokogiri/internals/NokogiriUserDataHandler.java +++ b/ext/java/nokogiri/internals/NokogiriUserDataHandler.java @@ -41,7 +41,7 @@ public void handle(short opCode, String key, Object data, Node src, Node dst) { private void nodeAdopted(String key, Object data, Node src, Node dst) { if(CACHED_NODE.equals(key)) { - ((XmlNode) data).resetDocumentCache(); + ((XmlNode) data).resetCache(ruby); } } @@ -67,7 +67,7 @@ private void nodeImported(String key, Object data, Node src, Node dst) { private void nodeRenamed(String key, Object data, Node src, Node dst) { if(CACHED_NODE.equals(key)) { - ((XmlNode) data).resetDueToRenaming(); + ((XmlNode) data).resetCache(ruby); } } diff --git a/ext/java/nokogiri/internals/ParserContext.java b/ext/java/nokogiri/internals/ParserContext.java new file mode 100644 index 00000000000..e131d72a34b --- /dev/null +++ b/ext/java/nokogiri/internals/ParserContext.java @@ -0,0 +1,107 @@ +package nokogiri.internals; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.StringReader; + +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyIO; +import org.jruby.RubyObject; +import org.jruby.RubyString; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.TypeConverter; +import org.xml.sax.InputSource; + +import static org.jruby.javasupport.util.RuntimeHelpers.invoke; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; + +/** + * Base class for the various parser contexts. Handles converting + * Ruby objects to InputSource objects. + * + * @author Patrick Mahoney + */ +public class ParserContext extends RubyObject { + protected InputSource source = null; + + public ParserContext(Ruby runtime) { + // default to class 'Object' because this class isn't exposed to Ruby + super(runtime, runtime.getObject()); + } + + public ParserContext(Ruby runtime, RubyClass klass) { + super(runtime, klass); + } + + protected InputSource getInputSource() { + return source; + } + + /** + * Set the InputSource from data which may be an IO + * object, a String, or a StringIO. + */ + public void setInputSource(ThreadContext context, + IRubyObject data) { + Ruby ruby = context.getRuntime(); + + if (invoke(context, data, "respond_to?", + ruby.newSymbol("to_io").to_sym()).isTrue()) { + /* IO or other object that responds to :to_io */ + RubyIO io = + (RubyIO) TypeConverter.convertToType(data, + ruby.getIO(), + "to_io"); + source = new InputSource(io.getInStream()); + } else { + RubyString str; + if (invoke(context, data, "respond_to?", + ruby.newSymbol("string").to_sym()).isTrue()) { + /* StringIO or other object that responds to :string */ + str = invoke(context, data, "string").convertToString(); + } else if (data instanceof RubyString) { + str = (RubyString) data; + } else { + throw ruby.newArgumentError( + "must be kind_of String or respond to :to_io or :string"); + } + + // I don't know why ByteArrayInputStream doesn't + // work... It's a similar problem to that + // rubyStringToString is supposed to solve (treating Ruby + // string data as UTF-8). But StringReader seems to work, + // so going with it. -- Patrick + + //byte[] bytes = rubyStringToString(str).getBytes(); + //source = new InputSource(new ByteArrayInputStream(bytes)); + source = new InputSource(new StringReader(rubyStringToString(str))); + } + } + + /** + * Set the InputSource to read from file, a String filename. + */ + public void setInputSourceFile(ThreadContext context, IRubyObject file) { + String filename = rubyStringToString(file); + + try{ + source = new InputSource(new FileInputStream(filename)); + } catch (Exception e) { + throw RaiseException + .createNativeRaiseException(context.getRuntime(), e); + } + + } + + /** + * Set the InputSource from stream. + */ + public void setInputSource(InputStream stream) { + source = new InputSource(stream); + } + +} diff --git a/ext/java/nokogiri/internals/PushInputStream.java b/ext/java/nokogiri/internals/PushInputStream.java new file mode 100644 index 00000000000..33e2f9601a5 --- /dev/null +++ b/ext/java/nokogiri/internals/PushInputStream.java @@ -0,0 +1,381 @@ +package nokogiri.internals; + +import java.io.InputStream; +import java.io.IOException; +import java.nio.channels.ClosedChannelException; +import java.lang.Math; +import java.lang.Thread; +import java.util.ArrayList; + + +/** + * Implements a "push" InputStream. An owner thread create an + * InputStream and passes it to a second thread. The owner thread + * calls PushInputStream.write() to write data to the stream. The + * second thread calls PushInputStream.read() and other InputStream + * methods. + * + * You should ensure that only one thread write to, and only one + * thread reads to, this stream, though nothing enforces this + * strictly. + */ +public class PushInputStream extends InputStream { + /** + * Current position in the stream relative to the start of the + * buffer. + */ + protected int pos; + + /** + * Current mark position, or -1 if there is no mark. + */ + protected int mark; + + protected int readlimit; + + /** + * State is open or closed. + */ + protected boolean isOpen; + + protected Buffer buffer; + + public PushInputStream() { + pos = 0; + mark = -1; + readlimit = -1; + isOpen = true; + + buffer = new Buffer(512); + } + + protected synchronized void ensureOpen() throws IOException { + if (!isOpen) { + throw new ClosedChannelException(); + } + } + + /** + * Write data that can be read from the stream. + */ + public synchronized void write(byte[] b) { + if (buffer == null) System.out.println("BUFFER IS NULL"); + if (b == null) System.out.println("BYTE ARRAY IS NILL"); + buffer.put(b); + notifyAll(); // notify readers waiting + } + + /** + * Write data and then wait until all the data has been read + * (waits until the thread reading from this stream is blocked in + * a read()). + */ + public synchronized void writeAndWaitForRead(byte[] b) throws IOException { + ensureOpen(); + write(b); + for (;;) { + try { + wait(); + break; + } catch (InterruptedException e) { + // continue waiting + } + } + } + + /* + *------------------------------------------------------------ + * InputStream methods + *------------------------------------------------------------ + */ + + /** + * @see InputStream.available() + */ + @Override + public synchronized int available() throws IOException { + ensureOpen(); + return buffer.size() - pos; + } + + int nClose = 0; + /** + * @see InputStream.close() + */ + @Override + public synchronized void close() throws IOException { + if (!isOpen) return; + isOpen = false; + buffer = null; + notifyAll(); + } + + /** + * @see InputStream.mark() + */ + @Override + public synchronized void mark(int readlimit) { + this.mark = pos; + this.readlimit = readlimit; + } + + /** + * Mark the current position in this stream. Supported by + * PushInputStream. + * + * @see InputStream.markSupported() + */ + @Override + public synchronized boolean markSupported() { + return true; + } + + /** + * @see InputStream.read() + */ + @Override + public synchronized int read() throws IOException { + ensureOpen(); + byte[] b = new byte[1]; + read(b, 0, 1); + return (int) b[0]; + } + + /** + * @see InputStream.read(byte[]) + */ + @Override + public synchronized int read(byte[] b) throws IOException { + ensureOpen(); + return read(b, 0, b.length); + } + + protected synchronized boolean markIsValid() { + return (mark >= 0 && pos < mark+readlimit); + } + + /** + * @see InputStream.read(byte[], int, int) + */ + @Override + public synchronized int read(byte[] b, int off, int len) throws IOException { + while (isOpen && available() == 0) { + /* block until data available */ + try { + notifyAll(); // notify writers waiting + wait(); + } catch (InterruptedException e) { + // continue waiting + } + } + + if (!isOpen) { + return -1; + } + + int readLen = Math.min(available(), len); + + buffer.get(pos, readLen, b, off); + pos += readLen; + + int reduce; + + if (markIsValid()) { + reduce = mark; + } else { + reduce = pos; + } + + buffer.truncateFromStart(buffer.size - reduce); + pos -= reduce; + mark -= reduce; + if (mark < 0) mark = -1; // don't wrap mark around? + + return readLen; + } + + /** + * @see InputStream.reset() + */ + @Override + public synchronized void reset() throws IOException { + ensureOpen(); + if (markIsValid()) + pos = mark; + } + + /** + * @see InputStream.skip() + */ + @Override + public synchronized long skip(long n) throws IOException { + ensureOpen(); + pos += n; + return n; + } + + /* + *------------------------------------------------------------ + * Data Buffer + *------------------------------------------------------------ + */ + + public static class Block { + protected byte[] data; + + public Block(int size) { + data = new byte[size]; + } + + public void copyIn(byte[] src, int srcPos, int destPos, int length) { + System.arraycopy(src, srcPos, data, destPos, length); + } + + public void copyOut(int srcPos, byte[] dest, int destPos, int length) { + System.arraycopy(data, srcPos, dest, destPos, length); + } + } + + public static class BlockList extends ArrayList { + public BlockList() { + super(); + } + + @Override + public void removeRange(int fromIndex, int toIndex) { + super.removeRange(fromIndex, toIndex); + } + } + + public static class Buffer { + protected int blockSize; + protected BlockList blocks; + + /** + * Offset (position) to the first logical byte in the buffer. + */ + protected int offset; + + /** + * Logical size of the buffer. + */ + protected int size; + + public Buffer(int blockSize) { + this.blockSize = blockSize; + this.blocks = new BlockList(); + this.offset = 0; + this.size = 0; + } + + public int size() { + return size; + } + + protected class Segment { + /** + * Block index. + */ + protected int block; + + /** + * Offset into the block. + */ + protected int off; + + /** + * Length of segment. + */ + protected int len; + + /** + * Calculate the block number and block offset given a position. + */ + protected Segment(int pos) { + int absPos = offset + pos; + block = (int) (absPos / blockSize); + off = (int) (absPos % blockSize); + len = -1; + } + } + + protected Segment[] accessList(int pos, int size) { + Segment start = new Segment(pos); + Segment end = new Segment(pos + size); + int nBlocks = end.block - start.block + 1; + Segment[] segs = new Segment[nBlocks]; + + start.len = Math.min(size, blockSize - start.off); + segs[0] = start; + int currPos = pos + start.len; + int currSize = start.len; + for (int i = 1; i < nBlocks; i++) { + Segment seg = new Segment(currPos); + seg.len = Math.min(blockSize, size - currSize); + segs[i] = seg; + currPos += seg.len; + currSize += seg.len; + } + + return segs; + } + + protected void ensureCapacity(int pos) { + Segment seg = new Segment(pos-1); + + while (blocks.size() < (seg.block + 1)) + blocks.add(new Block(blockSize)); + } + + public void put(byte b) { + byte[] buf = new byte[1]; + buf[0] = b; + put(buf); + } + + public void put(byte[] b) { + ensureCapacity(size + b.length); + Segment[] segs = accessList(size, b.length); + + int off = 0; + for (int i = 0; i < segs.length; i++) { + Block block = blocks.get(segs[i].block); + block.copyIn(b, off, segs[i].off, segs[i].len); + } + + size += b.length; + } + + public byte[] get(int pos, int len) { + byte[] b = new byte[len]; + get(pos, len, b, 0); + return b; + } + + /** + * Throws IndexOutOfBoundsException. + */ + public void get(int pos, int len, byte[] b, int off) { + Segment[] segs = accessList(pos, len); + for (int i = 0; i < segs.length; i++) { + Block block = blocks.get(segs[i].block); + block.copyOut(segs[i].off, b, off, segs[i].len); + } + } + + /** + * Truncate the buffer to newSize by removing + * data from the start of the buffer. + */ + public void truncateFromStart(int newSize) { + if (newSize > size || newSize < 0) + throw new RuntimeException("invalid size"); + + Segment newStart = new Segment(size - newSize); + blocks.removeRange(0, newStart.block); + + size = newSize; + offset = newStart.off; + } + } +} diff --git a/ext/java/nokogiri/internals/SaveContext.java b/ext/java/nokogiri/internals/SaveContext.java index e2988291da4..5ea4b6dc564 100644 --- a/ext/java/nokogiri/internals/SaveContext.java +++ b/ext/java/nokogiri/internals/SaveContext.java @@ -1,5 +1,9 @@ package nokogiri.internals; +import java.lang.Character; +import org.jruby.Ruby; +import org.jruby.RubyString; + /** * * @author sergio @@ -46,6 +50,10 @@ public void append(String s) { this.buffer.append(s); } + public void append(char c) { + buffer.append(c); + } + public void append(StringBuffer sb) { this.buffer.append(sb); } @@ -62,6 +70,113 @@ public void appendQuoted(StringBuffer sb) { this.append("\""); } + public void emptyTag(String name) { + emptyTagStart(name); + emptyTagEnd(name); + } + + public void emptyTagStart(String name) { + openTagInlineStart(name); + } + + public void emptyTagEnd(String name) { + if (asHtml) { + if (noEmpty()) { + append(">"); + } else { + openTagInlineEnd(); + closeTagInline(name); + } + } else if (xhtml) { + append(" />"); + } else { + append("/>"); + } + } + + public void openTag(String name) { + openTagStart(name); + openTagEnd(); + } + + public void openTagStart(String name) { + maybeBreak(); + indent(); + append("<"); + append(name); + } + + public void openTagEnd() { + append(">"); + maybeBreak(); + increaseLevel(); + } + + public void closeTag(String name) { + decreaseLevel(); + maybeBreak(); + indent(); + append(""); + maybeBreak(); + } + + public void openTagInline(String name) { + openTagInlineStart(name); + openTagInlineEnd(); + } + + public void openTagInlineStart(String name) { + maybeIndent(); + append("<"); + append(name); + } + + public void openTagInlineEnd() { + append(">"); + } + + public void closeTagInline(String name) { + append(""); + } + + public void maybeBreak() { + if (format && !endsInNewline()) append('\n'); + } + + public void maybeSpace() { + if (format && !endsInWhitespace()) append(' '); + } + + /** + * Indent if this is the start of a fresh line. + */ + public void maybeIndent() { + if (format && endsInNewline()) indent(); + } + + public void indent() { + if (format) append(getCurrentIndentString()); + } + + public boolean endsInWhitespace() { + return (Character.isWhitespace(lastChar())); + } + + public boolean endsInNewline() { + return (lastChar() == '\n'); + } + + public char lastChar() { + if (buffer.length() == 0) return '\n'; // logically, the char + // *before* a text file + // is a newline + return buffer.charAt(buffer.length() - 1); + } + public boolean asHtml() { return this.asHtml; } public boolean asXml() { return this.asXml; } @@ -103,5 +218,9 @@ public void increaseLevel() { @Override public String toString() { return this.buffer.toString(); } + public RubyString toRubyString(Ruby runtime) { + return new RubyString(runtime, runtime.getString(), buffer); + } + public boolean Xhtml() { return this.xhtml; } } diff --git a/ext/java/nokogiri/internals/XmlAttrImpl.java b/ext/java/nokogiri/internals/XmlAttrImpl.java deleted file mode 100644 index eed9ea7b537..00000000000 --- a/ext/java/nokogiri/internals/XmlAttrImpl.java +++ /dev/null @@ -1,103 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNode; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Attr; -import org.w3c.dom.Element; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlAttrImpl extends XmlNodeImpl{ - - public static final String[] HTML_BOOLEAN_ATTRS = { - "checked", "compact", "declare", "defer", "disabled", "ismap", - "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", - "selected" - }; - - public XmlAttrImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - public boolean isHtmlBooleanAttr() { - String name = this.getNode().getNodeName().toLowerCase(); - - for(String s : HTML_BOOLEAN_ATTRS) { - if(s.equals(name)) return true; - } - - return false; - } - - private String serializeAttrTextContent(String s) { - char[] c = s.toCharArray(); - StringBuffer buffer = new StringBuffer(c.length); - - for(int i = 0; i < c.length; i++) { - switch(c[i]){ - case '\n': buffer.append(" "); break; - case '\r': buffer.append(" "); break; - case '\t': buffer.append(" "); break; - case '"': buffer.append("""); break; - case '<': buffer.append("<"); break; - case '>': buffer.append(">"); break; - case '&': buffer.append("&"); break; - default: buffer.append(c[i]); - } - } - - return buffer.toString(); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 2; } - - @Override - public void node_name_set(ThreadContext context, XmlNode current, IRubyObject nodeName) { - String newName = nodeName.convertToString().asJavaString(); - current.getNode().getOwnerDocument().renameNode(current.getNode(), null, newName); - current.setName(nodeName); - } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - Attr attr = (Attr) current.getNode(); - ctx.append(" "); - ctx.append(attr.getNodeName()); - ctx.append("=\""); - ctx.append(serializeAttrTextContent(attr.getValue())); - ctx.append("\""); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - Attr attr = (Attr) current.getNode(); - ctx.append(" "); - - ctx.append(attr.getNodeName()); - - if(!this.isHtmlBooleanAttr()) { - String value = attr.getValue(); - if(value != null) { - ctx.append("="); - ctx.append("\""); - ctx.append(serializeAttrTextContent(attr.getValue())); - ctx.append("\""); - } else { - ctx.append("=\"\""); - } - } - } - - @Override - public void unlink(ThreadContext context, XmlNode current) { - Attr attr = (Attr) current.getNode(); - Element parent = attr.getOwnerElement(); - parent.removeAttributeNode(attr); - } -} diff --git a/ext/java/nokogiri/internals/XmlAttributeDeclImpl.java b/ext/java/nokogiri/internals/XmlAttributeDeclImpl.java deleted file mode 100644 index 2f5e95fa4c5..00000000000 --- a/ext/java/nokogiri/internals/XmlAttributeDeclImpl.java +++ /dev/null @@ -1,45 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlAttributeDecl; -import nokogiri.XmlNode; - -import org.apache.xerces.dom.AttrImpl; -import org.jruby.Ruby; -import org.jruby.javasupport.JavaUtil; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Node; - -/** - * Implementation for ATTLIST declaration of DTD - * - * @author Yoko Harada - */ -public class XmlAttributeDeclImpl extends XmlNodeImpl { - private String declaration = null; - - public XmlAttributeDeclImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - public IRubyObject getDefault(ThreadContext context) { - return JavaUtil.convertJavaToRuby(context.getRuntime(), ((AttrImpl)getNode()).getTextContent()); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 16; } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(declaration); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - saveContent(context, current,ctx); - } - - public void setDeclaration(String declaration) { - this.declaration = declaration; - } -} diff --git a/ext/java/nokogiri/internals/XmlCdataImpl.java b/ext/java/nokogiri/internals/XmlCdataImpl.java deleted file mode 100644 index cb4cf828687..00000000000 --- a/ext/java/nokogiri/internals/XmlCdataImpl.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package nokogiri.internals; - -import nokogiri.XmlNode; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.CDATASection; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlCdataImpl extends XmlNodeImpl { - - public XmlCdataImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - public IRubyObject blank_p(ThreadContext context, XmlNode node) { - return context.getRuntime().newBoolean(this.isBlankNode(context, node)); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 4; } - - @Override - public IRubyObject getNullContent(ThreadContext context) { - return context.getRuntime().getNil(); - } - - @Override - public void saveContent(ThreadContext context, XmlNode cur, SaveContext ctx) { - CDATASection cdata = (CDATASection) cur.getNode(); - - if(cdata.getData().length() == 0) { - ctx.append(""); - } else { - ctx.append(""); - } - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode cur, SaveContext ctx) { - this.saveContent(context, cur, ctx); - } -} diff --git a/ext/java/nokogiri/internals/XmlCommentImpl.java b/ext/java/nokogiri/internals/XmlCommentImpl.java deleted file mode 100644 index c2361b0782e..00000000000 --- a/ext/java/nokogiri/internals/XmlCommentImpl.java +++ /dev/null @@ -1,37 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNode; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlCommentImpl extends XmlNodeImpl { - - public XmlCommentImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 8; } - - @Override - public boolean isComment() { return true; } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(""); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(""); - } -} diff --git a/ext/java/nokogiri/internals/XmlDeclHandler.java b/ext/java/nokogiri/internals/XmlDeclHandler.java new file mode 100644 index 00000000000..9ffb5cfb172 --- /dev/null +++ b/ext/java/nokogiri/internals/XmlDeclHandler.java @@ -0,0 +1,10 @@ +package nokogiri.internals; + +/** + * Interface for receiving xmlDecl information. + * + * @author Patrick Mahoney + */ +public interface XmlDeclHandler { + public void xmlDecl(String version, String encoding, String standalone); +} diff --git a/ext/java/nokogiri/internals/XmlDocumentFragmentImpl.java b/ext/java/nokogiri/internals/XmlDocumentFragmentImpl.java deleted file mode 100644 index c98b65d0585..00000000000 --- a/ext/java/nokogiri/internals/XmlDocumentFragmentImpl.java +++ /dev/null @@ -1,55 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.runtime.ThreadContext; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlDocumentFragmentImpl extends XmlNodeImpl { - - public XmlDocumentFragmentImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - public void add_child(ThreadContext context, XmlNode current, XmlNode child) { - // Some magic for DocumentFragment - - Ruby ruby = context.getRuntime(); - XmlNodeSet children = (XmlNodeSet) child.children(context); - - long length = children.length(); - - RubyArray childrenArray = children.convertToArray(); - - if(length != 0) { - for(int i = 0; i < length; i++) { - XmlNode item = (XmlNode) ((XmlNode) childrenArray.aref(ruby.newFixnum(i))).dup(context); - current.add_child(context, item); - } - } - } - - public void use_super_add_child(ThreadContext context, XmlNode current, XmlNode child) { - super.add_child(context, current, child); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 11; } - - @Override - public void relink_namespace(ThreadContext context, XmlNode current) { - ((XmlNodeSet) current.children(context)).relink_namespace(context); - } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - this.saveNodeListContent(context, (XmlNodeSet) current.children(context), ctx); - } -} diff --git a/ext/java/nokogiri/internals/XmlDocumentImpl.java b/ext/java/nokogiri/internals/XmlDocumentImpl.java deleted file mode 100644 index 9281bd94d30..00000000000 --- a/ext/java/nokogiri/internals/XmlDocumentImpl.java +++ /dev/null @@ -1,131 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlDocument; -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyClass; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Document; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlDocumentImpl extends XmlNodeImpl{ - - protected IRubyObject root; - protected IRubyObject encoding; - protected IRubyObject url; - - public XmlDocumentImpl(Ruby ruby, Node node) { - super(ruby, node); - this.url = ruby.getNil(); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 9; } - - @Override - public IRubyObject children(ThreadContext context, XmlNode cur) { - XmlDocument current = (XmlDocument) cur; - Ruby ruby = context.getRuntime(); - RubyArray nodes = ruby.newArray(); - nodes.append(current.root(context)); - XmlNodeSet result = new XmlNodeSet(ruby, nodes); - result.setDocument((XmlDocument) cur); - return result; - } - - public XmlNode dup_impl(ThreadContext context, XmlDocument current, boolean deep, RubyClass klazz) { - Document newDoc = (Document) current.getDocument().cloneNode(deep); - - return new XmlDocument(context.getRuntime(), klazz, newDoc); - } - - public IRubyObject encoding(ThreadContext context, XmlDocument current) { - if(this.encoding == null) { - if(current.getDocument().getXmlEncoding() == null) { - this.encoding = context.getRuntime().getNil(); - } else { - this.encoding = context.getRuntime().newString(current.getDocument().getXmlEncoding()); - } - } - - return this.encoding; - } - - public void encoding_set(ThreadContext context, XmlDocument current, IRubyObject encoding) { - this.encoding = encoding; - } - - @Override - public void relink_namespace(ThreadContext context, XmlNode current) { - XmlDocument cur = (XmlDocument) current; - ((XmlNode) cur.root(context)).relink_namespace(context); - } - - public IRubyObject root(ThreadContext context, XmlDocument current) { - if(this.root == null) { - this.root = NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), - current.getDocument().getDocumentElement()); - } - return root; - } - - public void root_set(ThreadContext context, XmlDocument current, IRubyObject root) { - Document document = current.getDocument(); - Node node = XmlNode.getNodeFromXmlNode(context, root); - if(!document.equals(node.getOwnerDocument())) { - document.adoptNode(node); - } - document.replaceChild(node, document.getDocumentElement()); - this.root = root; - } - - @Override - public void saveContent(ThreadContext context, XmlNode node, SaveContext ctx) { - XmlDocument cur = (XmlDocument) node; - Document curDoc = cur.getDocument(); - - if(!ctx.noDecl()) { - - ctx.append("\n"); - } - - XmlNode root = (XmlNode) cur.root(context); - root.saveContent(context, ctx); - ctx.append("\n"); - } - - public void url_set(IRubyObject url) { this.url = url; } - - public IRubyObject url() { return this.url; } - -} diff --git a/ext/java/nokogiri/internals/XmlDocumentTypeImpl.java b/ext/java/nokogiri/internals/XmlDocumentTypeImpl.java deleted file mode 100644 index 98f7f37f54e..00000000000 --- a/ext/java/nokogiri/internals/XmlDocumentTypeImpl.java +++ /dev/null @@ -1,194 +0,0 @@ -package nokogiri.internals; - -import java.util.StringTokenizer; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import nokogiri.XmlAttributeDecl; -import nokogiri.XmlDtdDeclaration; -import nokogiri.XmlElementDecl; -import nokogiri.XmlEntityDecl; -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; -import nokogiri.XmlNotation; - -import org.apache.xerces.dom.DeferredAttrNSImpl; -import org.apache.xerces.dom.DeferredDocumentTypeImpl; -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.RubyHash; -import org.jruby.RubyString; -import org.jruby.javasupport.JavaUtil; -import org.jruby.runtime.Block; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.DocumentType; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; - -/** - * Document Type node implementation. This class represents DOCTYPE declaration. - * - * @author Yoko Harada - */ -public class XmlDocumentTypeImpl extends XmlNodeImpl { - private Node doctype = null; - private RubyHash entities = null; - private RubyHash elements = null; - private RubyHash attributes = null; - private RubyHash notations = null; - private XmlNodeSet nodeSet = null; - - public XmlDocumentTypeImpl(Ruby ruby, Node node) { - super(ruby, node); - init(ruby.getCurrentContext(), node); - } - - public IRubyObject getSystemId(ThreadContext context) { - return JavaUtil.convertJavaToRuby(context.getRuntime(), ((DocumentType)getNode()).getSystemId()); - } - - public IRubyObject getPublicId(ThreadContext context) { - return JavaUtil.convertJavaToRuby(context.getRuntime(), ((DocumentType)getNode()).getPublicId()); - } - - public IRubyObject getEntities(ThreadContext context) { - return entities; - } - - public IRubyObject getElements(ThreadContext context) { - return elements; - } - - public IRubyObject getAttributes(ThreadContext context) { - return attributes; - } - - public IRubyObject getNotations(ThreadContext context) { - return notations; - } - - public IRubyObject children(ThreadContext context, XmlNode current) { - return getXmlNodeSet(context, current); - } - - private XmlNodeSet getXmlNodeSet(ThreadContext context, XmlNode current) { - if (doctype != current.getNode()) { - doctype = current.getNode(); - init(context, current.getNode()); - } - return nodeSet; - } - - private void init(ThreadContext context, Node current) { - initEntities(context, current); - initElements(context, current); - initNotations(context, current); // NOTATION is not XmlNode - nodeSet = (XmlNodeSet) XmlNodeSet.newEmptyNodeSet(context); - nodeSet.setDocument(this.getDocument(context)); - parseInternalSubsetString(context, nodeSet, ((org.w3c.dom.DocumentType)current).getInternalSubset()); - } - - private static Pattern p = Pattern.compile(""); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - saveContent(context, current, ctx); - } - -} diff --git a/ext/java/nokogiri/internals/XmlDomParser.java b/ext/java/nokogiri/internals/XmlDomParser.java new file mode 100644 index 00000000000..546d4d0a44f --- /dev/null +++ b/ext/java/nokogiri/internals/XmlDomParser.java @@ -0,0 +1,54 @@ +package nokogiri.internals; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import nokogiri.XmlDocument; +import org.apache.xerces.parsers.DOMParser; +import org.apache.xerces.parsers.StandardParserConfiguration; +import org.apache.xerces.xni.Augmentations; +import org.apache.xerces.xni.QName; +import org.apache.xerces.xni.XMLAttributes; +import org.apache.xerces.xni.XNIException; +import org.apache.xerces.xni.XMLLocator; +import org.apache.xerces.xni.XMLResourceIdentifier; +import org.apache.xerces.xni.parser.XMLDocumentFilter; +import org.apache.xerces.xni.parser.XMLParserConfiguration; +import org.cyberneko.dtd.DTDConfiguration; +import org.w3c.dom.Document; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * Sets up a Xerces/XNI DOM Parser for use with Nokogiri. Uses + * NekoDTD to parse the DTD into a tree of Nodes. + * + * @author Patrick Mahoney + */ +public class XmlDomParser extends DOMParser { + DOMParser dtd; + + public XmlDomParser() { + super(); + + DTDConfiguration dtdConfig = new DTDConfiguration(); + dtd = new DOMParser(dtdConfig); + + XMLParserConfiguration config = getXMLParserConfiguration(); + config.setDTDHandler(dtdConfig); + config.setDTDContentModelHandler(dtdConfig); + } + + @Override + public void parse(InputSource source) throws SAXException, IOException { + dtd.reset(); + super.parse(source); + Document doc = getDocument(); + if (doc == null) + throw new RuntimeException("null document"); + + doc.setUserData(XmlDocument.DTD_RAW_DOCUMENT, dtd.getDocument(), + null); + } +} diff --git a/ext/java/nokogiri/internals/ParseOptions.java b/ext/java/nokogiri/internals/XmlDomParserContext.java similarity index 61% rename from ext/java/nokogiri/internals/ParseOptions.java rename to ext/java/nokogiri/internals/XmlDomParserContext.java index f56ab77ae9d..9e15357acb8 100644 --- a/ext/java/nokogiri/internals/ParseOptions.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -3,32 +3,36 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import nokogiri.XmlDocument; import nokogiri.XmlSyntaxError; +import org.apache.xerces.parsers.DOMParser; import org.jruby.Ruby; import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyIO; +import org.jruby.RubyString; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.TypeConverter; import org.w3c.dom.Document; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import static org.jruby.javasupport.util.RuntimeHelpers.invoke; +import static nokogiri.internals.NokogiriHelpers.rubyStringToString; + /** * * @author sergio */ -public class ParseOptions { +public class XmlDomParserContext extends ParserContext { + protected static final String FEATURE_LOAD_EXTERNAL_DTD = + "http://apache.org/xml/features/nonvalidating/load-external-dtd"; public static final long STRICT = 0; public static final long RECOVER = 1; @@ -48,17 +52,21 @@ public class ParseOptions { public static final long NOCDATA = 16384; public static final long NOXINCNODE = 32768; + protected DOMParser parser; + protected boolean strict, recover, noEnt, dtdLoad, dtdAttr, dtdValid, noError, noWarning, pedantic, noBlanks, sax1, xInclude, noNet, noDict, nsClean, noCdata, noXIncNode; protected NokogiriErrorHandler errorHandler; - public ParseOptions(IRubyObject options) { - this(options.convertToInteger().getLongValue()); + public XmlDomParserContext(Ruby runtime, IRubyObject options) { + this(runtime, options.convertToInteger().getLongValue()); } - public ParseOptions(long options) { + public XmlDomParserContext(Ruby runtime, long options) { + super(runtime); + if(options == STRICT) { this.strict = true; this.recover = this.noEnt = this.dtdLoad = this.dtdAttr = @@ -91,30 +99,60 @@ public ParseOptions(long options) { } else { this.errorHandler = new NokogiriStrictErrorHandler(); } - } - public void addErrorsIfNecessary(ThreadContext context, XmlDocument doc) { - Ruby ruby = context.getRuntime(); - RubyArray errors = ruby.newArray(this.errorHandler.getErrorsReadyForRuby(context)); - doc.setInstanceVariable("@errors", errors); + initParser(); } - public DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - dbf.setNamespaceAware(true); - dbf.setIgnoringElementContentWhitespace(noBlanks); - dbf.setValidating(!this.continuesOnError()); + protected void initParser() { + parser = new XmlDomParser(); + + parser.setErrorHandler(this.errorHandler); - DocumentBuilder db = dbf.newDocumentBuilder(); - db.setEntityResolver(new EntityResolver() { - public InputSource resolveEntity(String arg0, String arg1) throws SAXException, IOException { - return new InputSource(new ByteArrayInputStream(new byte[0])); - } - }); + // If we turn off loading of external DTDs complete, we don't + // getthe publicID. Instead of turning off completely, we use + // an entity resolver that returns empty documents. + if (dtdLoad()) { + setFeature(FEATURE_LOAD_EXTERNAL_DTD, true); + } else { + parser.setEntityResolver(new EntityResolver() { + public InputSource resolveEntity(String arg0, String arg1) + throws SAXException, IOException { + ByteArrayInputStream empty = + new ByteArrayInputStream(new byte[0]); + return new InputSource(empty); + } + }); + } + } + + /** + * Convenience method that catches and ignores SAXException + * (unrecognized and unsupported exceptions). + */ + protected void setFeature(String feature, boolean value) { + try { + parser.setFeature(feature, value); + } catch (SAXException e) { + // ignore + } + } - db.setErrorHandler(this.errorHandler); + /** + * Convenience method that catches and ignores SAXException + * (unrecognized and unsupported exceptions). + */ + protected void setProperty(String property, Object value) { + try { + parser.setProperty(property, value); + } catch (SAXException e) { + // ignore + } + } - return db; + public void addErrorsIfNecessary(ThreadContext context, XmlDocument doc) { + Ruby ruby = context.getRuntime(); + RubyArray errors = ruby.newArray(this.errorHandler.getErrorsReadyForRuby(context)); + doc.setInstanceVariable("@errors", errors); } public XmlDocument getDocumentWithErrorsOrRaiseException(ThreadContext context, Exception ex) { @@ -139,47 +177,44 @@ public boolean continuesOnError() { return this.recover; } - public Document parse(InputSource input) - throws ParserConfigurationException, SAXException, IOException { - if (noBlanks) { - Reader reader = input.getCharacterStream(); - return parseWhenNoBlanks(reader); - } else { - return this.getDocumentBuilder().parse(input); - } - } - - private Document parseWhenNoBlanks(Reader reader) - throws IOException, SAXException, ParserConfigurationException { - StringBuffer content = new StringBuffer(); - char[] cbuf = new char[2048]; - int length; - while ((length = reader.read(cbuf)) != -1) { - content.append(cbuf, 0, length); - } - String content_noblanks = - (new String(content)).replaceAll("(>\\n)", ">").replaceAll("\\s{1,}<", "<").replaceAll(">\\s{1,}", ">"); - StringReader sr = new StringReader((new String(content_noblanks))); - return getDocumentBuilder().parse(new InputSource(sr)); + /** + * This method is broken out so that HtmlDomParserContext can + * override it. + */ + protected XmlDocument wrapDocument(ThreadContext context, + RubyClass klass, + Document doc) { + return new XmlDocument(context.getRuntime(), klass, doc); } - public Document parse(InputStream input) - throws ParserConfigurationException, SAXException, IOException { - if (noBlanks) { - InputStreamReader reader = new InputStreamReader(input); - return parseWhenNoBlanks(reader); - } else { - return this.getDocumentBuilder().parse(input); + /** + * Must call setInputSource() before this method. + */ + public XmlDocument parse(ThreadContext context, + IRubyObject klass, + IRubyObject url) { + Ruby ruby = context.getRuntime(); + + try { + Document doc = do_parse(); + XmlDocument xmlDoc = wrapDocument(context, (RubyClass)klass, doc); + xmlDoc.setUrl(url); + addErrorsIfNecessary(context, xmlDoc); + return xmlDoc; + } catch (SAXException e) { + return getDocumentWithErrorsOrRaiseException(context, e); + } catch (IOException e) { + return getDocumentWithErrorsOrRaiseException(context, e); } } - public Document parse(String input) - throws ParserConfigurationException, SAXException, IOException { - return this.getDocumentBuilder().parse(input); + protected Document do_parse() throws SAXException, IOException { + parser.parse(getInputSource()); + return parser.getDocument(); } public boolean dtdAttr() { return this.dtdAttr; } - + public boolean dtdLoad() { return this.dtdLoad; } public boolean dtdValid() { return this.dtdValid; } diff --git a/ext/java/nokogiri/internals/XmlElementDeclImpl.java b/ext/java/nokogiri/internals/XmlElementDeclImpl.java deleted file mode 100644 index 7d6b705401b..00000000000 --- a/ext/java/nokogiri/internals/XmlElementDeclImpl.java +++ /dev/null @@ -1,38 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNode; - -import org.apache.xerces.dom.DeferredElementDefinitionImpl; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.w3c.dom.Node; - -/** - * Implementation for ELEMENT declaration of DTD - * - * @author Yoko Harada - */ -public class XmlElementDeclImpl extends XmlNodeImpl { - private String declaration = null; - - public XmlElementDeclImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 15; } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(declaration); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - saveContent(context, current, ctx); - } - - public void setDeclaration(String declaration) { - this.declaration = declaration; - } -} diff --git a/ext/java/nokogiri/internals/XmlElementImpl.java b/ext/java/nokogiri/internals/XmlElementImpl.java deleted file mode 100644 index 7fa840e43fb..00000000000 --- a/ext/java/nokogiri/internals/XmlElementImpl.java +++ /dev/null @@ -1,238 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNamespace; -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyString; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Attr; -import org.w3c.dom.Element; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -/** - * - * @author sergio - */ -public class XmlElementImpl extends XmlNodeImpl { - - public XmlElementImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - public void add_namespace_definitions(ThreadContext context, XmlNode current, XmlNamespace ns, String prefix, String href) { - Element e = (Element) current.getNode(); - e.setAttribute(prefix, href); - - current.updateNodeNamespaceIfNecessary(context, ns); - } - - @Override - public IRubyObject get(ThreadContext context, XmlNode current, IRubyObject key) { - String keyString = key.convertToString().asJavaString(); - Element element = (Element) current.getNode(); - String value = element.getAttribute(keyString); - if(!value.equals("")){ - return RubyString.newString(context.getRuntime(), value); - } - return context.getRuntime().getNil(); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 1; } - - @Override - public boolean isElement() { return true; } - - @Override - public IRubyObject key_p(ThreadContext context, XmlNode current, IRubyObject k) { - Ruby ruby = context.getRuntime(); - String key = k.convertToString().asJavaString(); - Element element = (Element)current.getNode(); - return ruby.newBoolean(element.hasAttribute(key)); - } - - @Override - public void node_name_set(ThreadContext context, XmlNode current, IRubyObject nodeName) { - String newName = nodeName.convertToString().asJavaString(); - current.getNode().getOwnerDocument().renameNode(current.getNode(), null, newName); - current.setName(nodeName); - } - - @Override - public void op_aset(ThreadContext context, XmlNode current, IRubyObject index, IRubyObject val) { - String key = index.convertToString().asJavaString(); - String value = val.convertToString().asJavaString(); - Element element = (Element)current.getNode(); - element.setAttribute(key, value); - } - - @Override - public void remove_attribute(ThreadContext context, XmlNode current, IRubyObject name) { - String key = name.convertToString().asJavaString(); - Element element = (Element)current.getNode(); - element.removeAttribute(key); - } - - @Override - public void relink_namespace(ThreadContext context, XmlNode node) { - Element e = (Element) node.getNode(); - - e.getOwnerDocument().renameNode(e, e.lookupNamespaceURI(e.getPrefix()), e.getNodeName()); - - if(e.hasAttributes()) { - NamedNodeMap attrs = e.getAttributes(); - - for(int i = 0; i < attrs.getLength(); i++) { - Attr attr = (Attr) attrs.item(i); - String nsUri = ""; - String prefix = attr.getPrefix(); - String nodeName = attr.getNodeName(); - if("xml".equals(prefix)) { - nsUri = "http://www.w3.org/XML/1998/namespace"; - } else if("xmlns".equals(prefix) || nodeName.equals("xmlns")) { - nsUri = "http://www.w3.org/2000/xmlns/"; - } else { - nsUri = attr.lookupNamespaceURI(nodeName); - } - - e.getOwnerDocument().renameNode(attr, nsUri, nodeName); - - } - } - - if(e.hasChildNodes()) { - ((XmlNodeSet) node.children(context)).relink_namespace(context); - } - } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - boolean format = ctx.format(); - - Element e = (Element) current.getNode(); - - if(format) { - NodeList tmp = e.getChildNodes(); - for(int i = 0; i < tmp.getLength(); i++) { - Node cur = tmp.item(i); - if(cur.getNodeType() == Node.TEXT_NODE || - cur.getNodeType() == Node.CDATA_SECTION_NODE || - cur.getNodeType() == Node.ENTITY_REFERENCE_NODE) { - ctx.setFormat(false); - break; - } - } - } - - ctx.append("<"); - ctx.append(e.getNodeName()); - RubyArray attr_list = NokogiriHelpers.namedNodeMapToRubyArray(context.getRuntime(), current.getNode().getAttributes()); - this.saveNodeListContent(context, attr_list, ctx); - - if(e.getChildNodes() == null && !ctx.noEmpty()) { - ctx.append("/>"); - ctx.setFormat(format); - return; - } - - ctx.append(">"); - -// ctx.append(current.content(context).convertToString().asJavaString()); - - XmlNodeSet children = (XmlNodeSet) current.children(context); - - if(!children.isEmpty()) { - if(ctx.format()) ctx.append("\n"); - ctx.increaseLevel(); - this.saveNodeListContent(context, children, ctx); - ctx.decreaseLevel(); - if(ctx.format()) ctx.append(ctx.getCurrentIndentString()); - } - - ctx.append(""); - - ctx.setFormat(format); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - - Element e = (Element) current.getNode(); - - - ctx.append("<"); - ctx.append(e.getNodeName()); - this.saveNodeListContentAsHtml(context, (RubyArray) current.attribute_nodes(context), ctx); - - ctx.append(">"); - - Node next = e.getFirstChild(); - Node parent = e.getParentNode(); - if(ctx.format() && next != null && - next.getNodeType() != Node.TEXT_NODE && - next.getNodeType() != Node.ENTITY_REFERENCE_NODE && - parent != null && - parent.getNodeName() != null && - parent.getNodeName().charAt(0) != 'p'){ - ctx.append("\n"); - } - - if(e.getChildNodes().getLength() == 0) { - ctx.append(""); - if(ctx.format() && next != null && - next.getNodeType() != Node.TEXT_NODE && - next.getNodeType() != Node.ENTITY_REFERENCE_NODE && - parent != null && - parent.getNodeName() != null && - parent.getNodeName().charAt(0) != 'p'){ - ctx.append("\n"); - } - return; - } - - XmlNodeSet children = (XmlNodeSet) current.children(context); - - if(!children.isEmpty()) { - if(ctx.format() && next != null && - next.getNodeType() != Node.TEXT_NODE && - next.getNodeType() != Node.ENTITY_REFERENCE_NODE && - parent != null && - parent.getNodeName() != null && - parent.getNodeName().charAt(0) != 'p'){ - ctx.append("\n"); - } - this.saveNodeListContentAsHtml(context, children, ctx); - if(ctx.format() && next != null && - next.getNodeType() != Node.TEXT_NODE && - next.getNodeType() != Node.ENTITY_REFERENCE_NODE && - parent != null && - parent.getNodeName() != null && - parent.getNodeName().charAt(0) != 'p'){ - ctx.append("\n"); - } - } - - ctx.append(""); - - if(ctx.format() && next != null && - next.getNodeType() != Node.TEXT_NODE && - next.getNodeType() != Node.ENTITY_REFERENCE_NODE && - parent != null && - parent.getNodeName() != null && - parent.getNodeName().charAt(0) != 'p'){ - ctx.append("\n"); - } - } -} diff --git a/ext/java/nokogiri/internals/XmlEmptyDocumentImpl.java b/ext/java/nokogiri/internals/XmlEmptyDocumentImpl.java deleted file mode 100644 index 51735b32ab6..00000000000 --- a/ext/java/nokogiri/internals/XmlEmptyDocumentImpl.java +++ /dev/null @@ -1,74 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlDocument; -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; -import org.jruby.Ruby; -import org.jruby.RubyClass; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Document; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlEmptyDocumentImpl extends XmlDocumentImpl{ - - public XmlEmptyDocumentImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - public IRubyObject children(ThreadContext context, XmlNode current) { - return XmlNodeSet.newEmptyNodeSet(context); - } - - @Override - public Node cloneNode(ThreadContext context, XmlNode current, boolean deep) { - return ((XmlDocument) current).getDocument().cloneNode(deep); - } - - @Override - public XmlNode dup_impl(ThreadContext context, XmlDocument current, boolean deep, RubyClass klazz) { - return (XmlNode) XmlDocument.rbNew(context, klazz, new IRubyObject[0]); - } - - @Override - public IRubyObject encoding(ThreadContext context, XmlDocument current) { - if(this.encoding == null) { - this.encoding = context.getRuntime().getNil(); - } - - return this.encoding; - } - - @Override - public void post_add_child(ThreadContext context, XmlNode current, XmlNode child) { - this.changeInternalNode(context, (XmlDocument) current); - } - - @Override - public IRubyObject root(ThreadContext context, XmlDocument current) { - if(this.root == null) { - this.root = context.getRuntime().getNil(); - } - return root; - } - - @Override - public void root_set(ThreadContext context, XmlDocument current, IRubyObject root) { - Document document = current.getDocument(); - Node node = XmlNode.getNodeFromXmlNode(context, root); - if(!document.equals(node.getOwnerDocument())) { - document.adoptNode(node); - } - document.appendChild(node); - changeInternalNode(context, current); - } - - protected void changeInternalNode(ThreadContext context, XmlDocument doc) { - doc.setInternalNode(XmlNodeImpl.getImplForNode(context.getRuntime(), doc.getDocument())); - } -} diff --git a/ext/java/nokogiri/internals/XmlEntityDeclImpl.java b/ext/java/nokogiri/internals/XmlEntityDeclImpl.java deleted file mode 100644 index 6236c39ea6d..00000000000 --- a/ext/java/nokogiri/internals/XmlEntityDeclImpl.java +++ /dev/null @@ -1,38 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNode; - -import org.apache.xerces.dom.DeferredEntityImpl; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.w3c.dom.Node; - -/** - * Implementation for ENTITY declaration of DTD - * - * @author Yoko Harada - */ -public class XmlEntityDeclImpl extends XmlNodeImpl { - private String declaration = null; - - public XmlEntityDeclImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 17; } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(declaration); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - saveContent(context, current, ctx); - } - - public void setDeclaration(String declaration) { - this.declaration = declaration; - } -} diff --git a/ext/java/nokogiri/internals/XmlEntityReferenceImpl.java b/ext/java/nokogiri/internals/XmlEntityReferenceImpl.java deleted file mode 100644 index d792cb10662..00000000000 --- a/ext/java/nokogiri/internals/XmlEntityReferenceImpl.java +++ /dev/null @@ -1,16 +0,0 @@ -package nokogiri.internals; - -import org.jruby.Ruby; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -class XmlEntityReferenceImpl extends XmlNodeImpl { - - public XmlEntityReferenceImpl(Ruby ruby, Node node) { - super(ruby, node); - } - -} diff --git a/ext/java/nokogiri/internals/XmlNodeImpl.java b/ext/java/nokogiri/internals/XmlNodeImpl.java deleted file mode 100644 index 997c74c0508..00000000000 --- a/ext/java/nokogiri/internals/XmlNodeImpl.java +++ /dev/null @@ -1,383 +0,0 @@ -package nokogiri.internals; - -import static nokogiri.internals.NokogiriHelpers.isNamespace; -import nokogiri.XmlDocument; -import nokogiri.XmlNamespace; -import nokogiri.XmlNode; -import nokogiri.XmlNodeSet; - -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyString; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Document; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlNodeImpl { - - protected IRubyObject content, doc, name, namespace, namespace_definitions; - - private Node node; - - private static final IRubyObject DEFAULT_CONTENT = null; - private static final IRubyObject DEFAULT_DOC = null; - private static final IRubyObject DEFAULT_NAME = null; - private static final IRubyObject DEFAULT_NAMESPACE = null; - private static final IRubyObject DEFAULT_NAMESPACE_DEFINITIONS = null; - - public XmlNodeImpl(Ruby ruby, Node node) { - this.node = node; - } - - public IRubyObject children(ThreadContext context, XmlNode current) { - XmlNodeSet result = new XmlNodeSet(context.getRuntime(), current.getNode().getChildNodes()); - result.setDocument(this.getDocument(context)); - return result; - } - - public IRubyObject getContent(ThreadContext context) { - if(this.content == DEFAULT_CONTENT) { - String textContent = this.node.getTextContent(); - this.content = (textContent == null) ? methods().getNullContent(context) : - context.getRuntime().newString(textContent); - } - - return this.content; - } - - public XmlDocument getDocument(ThreadContext context) { - if(this.doc == DEFAULT_DOC) { - this.doc = NokogiriHelpers.getCachedNodeOrCreate(context.getRuntime(), - this.node.getOwnerDocument()); - } - - return (XmlDocument) this.doc; - } - - public IRubyObject getNamespace(ThreadContext context) { - if(this.namespace == DEFAULT_NAMESPACE) { - this.namespace = new XmlNamespace(context.getRuntime(), this.node.getPrefix(), - this.node.lookupNamespaceURI(this.node.getPrefix())); - if(((XmlNamespace) this.namespace).isEmpty()) { - this.namespace = context.getRuntime().getNil(); - } - } - - return this.namespace; - } - - public Node getNode() { - return this.node; - } - - public RubyString getNodeName(ThreadContext context) { - if(this.name == DEFAULT_NAME) { - this.name = context.getRuntime().newString(NokogiriHelpers.getNodeName(this.node)); - } - - return (RubyString) this.name; - } - - public RubyArray getNsDefinitions(Ruby ruby) { - if(this.namespace_definitions == DEFAULT_NAMESPACE_DEFINITIONS) { - RubyArray arr = ruby.newArray(); - NamedNodeMap nodes = this.node.getAttributes(); - - if(nodes == null) { - return ruby.newEmptyArray(); - } - - for(int i = 0; i < nodes.getLength(); i++) { - Node n = nodes.item(i); - if(isNamespace(n)) { - arr.append(XmlNamespace.fromNode(ruby, n)); - } - } - - this.namespace_definitions = arr; - } - - return (RubyArray) this.namespace_definitions; - } - - public XmlNodeImpl methods() { - return this; - } - - public void resetContent() { - this.content = DEFAULT_CONTENT; - } - - public void resetDocument() { - this.doc = DEFAULT_DOC; - } - - public void resetName() { - this.name = DEFAULT_NAME; - } - - public void resetNamespace() { - this.namespace = DEFAULT_NAMESPACE; - } - - public void resetNamespaceDefinitions() { - this.namespace_definitions = DEFAULT_NAMESPACE_DEFINITIONS; - } - - public void setContent(IRubyObject content) { - this.content = content; - } - - public void setDocument(IRubyObject doc) { - this.doc = doc; - } - - public void setName(IRubyObject name) { - this.name = name; - } - - public void setNamespace(IRubyObject ns) { - this.namespace = ns; - } - - public void setNamespaceDefinitions(IRubyObject namespace_definitions) { - this.namespace_definitions = namespace_definitions; - } - - public void setNode(Node node) { - this.node = node; - } - - /* - * Specific implementation of methods. - */ - - public void add_child(ThreadContext context, XmlNode current, XmlNode child) { - - Node appended = child.getNode(); - - if(child.document(context) != current.document(context)) { - ((XmlDocument) current.document(context)).getDocument().adoptNode(appended); - child.setDocument(current.document(context)); - } else if(appended.getParentNode() != null) { - child.unlink(context); - } - - if(appended.getNodeType() == Node.TEXT_NODE) { - RubyArray children = ((XmlNodeSet) current.children(context)).convertToArray(); - if(!children.isEmpty()) { - XmlNode last = (XmlNode) children.last(); - XmlNode.coalesceTextNodes(context, last, child); - return; - } - } - - Node currentNode = current.getNode(); - if(appended.getNodeType() == Node.TEXT_NODE && currentNode instanceof Document) { - Node newNode = ((Document) currentNode).createElement("xml"); - newNode.appendChild(appended); - appendChild(context, currentNode, newNode); - } else { - appendChild(context, currentNode, appended); - } - - child.relink_namespace(context); - - current.post_add_child(context, current, child); - } - - - private void appendChild(ThreadContext context, Node parent, Node child) { - try { - parent.appendChild(child); - } catch (Exception ex) { - throw context.getRuntime().newRuntimeError(ex.toString()); - } - } - - public void add_next_sibling(ThreadContext context, XmlNode new_node, XmlNode ref_node) { - Node newNode = new_node.getNode(); - - if(new_node.document(context) != ref_node.document(context)) { - ((XmlDocument) ref_node.document(context)).getDocument().adoptNode(newNode); - new_node.setDocument(ref_node.document(context)); - } else if(newNode.getParentNode() != null) { - new_node.unlink(context); - } - - if(newNode.getNodeType() == Node.TEXT_NODE) { - XmlNode.coalesceTextNodes(context, ref_node, new_node); - return; - } - - try { - Node refNode = ref_node.getNode(); - if (newNode.getNodeType() == Node.TEXT_NODE && refNode instanceof Document) { - Node tmpNode = ((Document) refNode).createElement("xml"); - appendNextSibling(newNode, tmpNode); - } else { - appendNextSibling(newNode, refNode); - } - } catch (Exception e) { - throw context.getRuntime().newRuntimeError(e.toString()); - } - - new_node.relink_namespace(context); - } - - private void appendNextSibling(Node newNode, Node refNode) { - Node next = refNode.getNextSibling(); - if (next == null) { - Node parent = refNode.getParentNode(); - if(parent == null) { - refNode.appendChild(newNode); - } else { - parent.appendChild(newNode); - } - return; - } else { - Node parent = refNode.getParentNode(); - if (parent != null) { - parent.insertBefore(newNode, next); - } - } - } - - public void add_namespace_definitions(ThreadContext context, XmlNode current, XmlNamespace ns, String prefix, String href) {} - - public IRubyObject blank_p(ThreadContext context, XmlNode node) { - return context.getRuntime().getFalse(); - } - - public Node cloneNode(ThreadContext context, XmlNode current, boolean deep) { - return current.getNode().cloneNode(deep); - } - - public IRubyObject get(ThreadContext context, XmlNode current, IRubyObject key) { - return context.getRuntime().getNil(); - } - - public IRubyObject getNokogiriNodeType(ThreadContext context) { - return context.getRuntime().newFixnum(this.getNokogiriNodeTypeInternal()); - } - - protected int getNokogiriNodeTypeInternal(){ return 0; } - - public IRubyObject getNullContent(ThreadContext context) { - return context.getRuntime().newString(); - } - - protected boolean isBlankChar(char a) { - return Character.isWhitespace(a); - } - - protected boolean isBlankNode(ThreadContext context, XmlNode node) { - RubyString cont = node.content(context).convertToString(); - if(cont.isEmpty()) return false; - - String content = cont.asJavaString(); - - char[] cur = content.toCharArray(); - - for(int i=0; i < cur.length; i++) { - if(!isBlankChar(cur[i])) return false; - } - - return true; - } - - public boolean isComment() { return false; } - - public boolean isElement() { return false; } - - public boolean isProcessingInstruction() { return false; } - - public IRubyObject key_p(ThreadContext context, XmlNode current, IRubyObject k) { - return context.getRuntime().getFalse(); - } - - public void node_name_set(ThreadContext context, XmlNode current, IRubyObject nodeName) {} - - public void op_aset(ThreadContext context, XmlNode current, IRubyObject index, IRubyObject val) {} - - public void post_add_child(ThreadContext context, XmlNode current, XmlNode child) { } - - public void remove_attribute(ThreadContext context, XmlNode current, IRubyObject name) {} - - public void relink_namespace(ThreadContext context, XmlNode current) {} - - protected void saveNodeListContent(ThreadContext context, XmlNodeSet list, SaveContext ctx) { - this.saveNodeListContent(context, (RubyArray) list.to_a(context), ctx); - } - - protected void saveNodeListContent(ThreadContext context, RubyArray array, SaveContext ctx) { - int length = array.getLength(); - - boolean formatIndentation = ctx.format() && ctx.indentString()!=null; - - for(int i = 0; i < length; i++) { - XmlNode cur = (XmlNode) array.aref(context.getRuntime().newFixnum(i)); - - if(formatIndentation && - (cur.isElement() || cur.isComment() || cur.isProcessingInstruction())) { - ctx.append(ctx.getCurrentIndentString()); - } - - cur.saveContent(context, ctx); - - if(ctx.format()) ctx.append("\n"); - } - } - - protected void saveNodeListContentAsHtml(ThreadContext context, XmlNodeSet list, SaveContext ctx) { - this.saveNodeListContentAsHtml(context, (RubyArray) list.to_a(context), ctx); - } - - protected void saveNodeListContentAsHtml(ThreadContext context, RubyArray array, SaveContext ctx) { - int length = array.getLength(); - - boolean formatIndentation = ctx.format() && ctx.indentString()!=null; - - for(int i = 0; i < length; i++) { - XmlNode cur = (XmlNode) array.aref(context.getRuntime().newFixnum(i)); - - cur.saveContentAsHtml(context, ctx); - } - } - - public void saveContent(ThreadContext context, XmlNode cur, SaveContext ctx) {} - - public void saveContentAsHtml(ThreadContext context, XmlNode cur, SaveContext ctx) {} - - public void unlink(ThreadContext context, XmlNode current) { - Node currentNode = current.getNode(); - if(currentNode.getParentNode() == null) { - throw context.getRuntime().newRuntimeError("TYPE: "+currentNode.getNodeType()+ " PARENT NULL"); - } else { - currentNode.getParentNode().removeChild(currentNode); - } - } - - public static XmlNodeImpl getImplForNode(Ruby ruby, Node node) { - if(node == null) return new XmlNodeImpl(ruby, node); - switch(node.getNodeType()) { - case Node.ATTRIBUTE_NODE: return new XmlAttrImpl(ruby, node); - case Node.CDATA_SECTION_NODE: return new XmlCdataImpl(ruby, node); - case Node.COMMENT_NODE: return new XmlCommentImpl(ruby, node); - case Node.DOCUMENT_FRAGMENT_NODE: return new XmlDocumentFragmentImpl(ruby, node); - case Node.DOCUMENT_NODE: return new XmlDocumentImpl(ruby, ((Document) node)); - case Node.ELEMENT_NODE: return new XmlElementImpl(ruby, node); - case Node.ENTITY_REFERENCE_NODE: return new XmlEntityReferenceImpl(ruby, node); - case Node.PROCESSING_INSTRUCTION_NODE: return new XmlProcessingInstructionImpl(ruby, node); - case Node.DOCUMENT_TYPE_NODE: return new XmlDocumentTypeImpl(ruby, node); - case Node.TEXT_NODE : return new XmlTextImpl(ruby, node); - default: return new XmlNodeImpl(ruby, node); - } - } -} diff --git a/ext/java/nokogiri/internals/XmlProcessingInstructionImpl.java b/ext/java/nokogiri/internals/XmlProcessingInstructionImpl.java deleted file mode 100644 index 68f797fe12b..00000000000 --- a/ext/java/nokogiri/internals/XmlProcessingInstructionImpl.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package nokogiri.internals; - -import nokogiri.XmlNode; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlProcessingInstructionImpl extends XmlNodeImpl{ - - public XmlProcessingInstructionImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 7; } - - @Override - public boolean isProcessingInstruction() { return true; } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(""); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(""); - } - -} diff --git a/ext/java/nokogiri/internals/XmlSaxParser.java b/ext/java/nokogiri/internals/XmlSaxParser.java new file mode 100644 index 00000000000..23cb037cb83 --- /dev/null +++ b/ext/java/nokogiri/internals/XmlSaxParser.java @@ -0,0 +1,33 @@ +package nokogiri.internals; + +import org.apache.xerces.parsers.SAXParser; +import org.apache.xerces.xni.Augmentations; +import org.apache.xerces.xni.XNIException; + +/** + * Extends SAXParser in order to receive xmlDecl events and pass them + * on to a handler. + * + * @author Patrick Mahoney + */ +public class XmlSaxParser extends SAXParser { + + protected XmlDeclHandler xmlDeclHandler = null; + + public XmlSaxParser() { + super(); + } + + public void setXmlDeclHandler(XmlDeclHandler xmlDeclHandler) { + this.xmlDeclHandler = xmlDeclHandler; + } + + @Override + public void xmlDecl(String version, String encoding, String standalone, + Augmentations augs) throws XNIException { + super.xmlDecl(version, encoding, standalone, augs); + if (xmlDeclHandler != null) { + xmlDeclHandler.xmlDecl(version, encoding, standalone); + } + } +} diff --git a/ext/java/nokogiri/internals/XmlTextImpl.java b/ext/java/nokogiri/internals/XmlTextImpl.java deleted file mode 100644 index 350c36afb10..00000000000 --- a/ext/java/nokogiri/internals/XmlTextImpl.java +++ /dev/null @@ -1,44 +0,0 @@ -package nokogiri.internals; - -import nokogiri.XmlNode; -import org.jruby.Ruby; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.builtin.IRubyObject; -import org.w3c.dom.Node; - -/** - * - * @author sergio - */ -public class XmlTextImpl extends XmlNodeImpl { - - public XmlTextImpl(Ruby ruby, Node node) { - super(ruby, node); - } - - @Override - public IRubyObject blank_p(ThreadContext context, XmlNode node) { - return context.getRuntime().newBoolean(this.isBlankNode(context, node)); - } - - @Override - protected int getNokogiriNodeTypeInternal() { return 3; } - - @Override - public void saveContent(ThreadContext context, XmlNode current, SaveContext ctx) { - if(ctx.format()) { - ctx.append(ctx.getCurrentIndentString()); - } - - ctx.append(NokogiriHelpers.encodeJavaString( - current.content(context).convertToString().asJavaString() - )); - } - - @Override - public void saveContentAsHtml(ThreadContext context, XmlNode current, SaveContext ctx) { - ctx.append(NokogiriHelpers.encodeJavaString( - current.content(context).convertToString().asJavaString() - )); - } -} diff --git a/lib/nekodtd.jar b/lib/nekodtd.jar new file mode 100644 index 00000000000..96e0c300113 Binary files /dev/null and b/lib/nekodtd.jar differ diff --git a/lib/nokogiri.rb b/lib/nokogiri.rb index 082d226d380..2a4584f8000 100644 --- a/lib/nokogiri.rb +++ b/lib/nokogiri.rb @@ -13,6 +13,7 @@ require 'isorelax.jar' require 'jing.jar' require 'nekohtml.jar' + require 'nekodtd.jar' require 'xercesImpl.jar' require 'nokogiri/nokogiri' else diff --git a/lib/nokogiri/css/tokenizer.rex b/lib/nokogiri/css/tokenizer.rex index 42b4ba2264d..d27b56f0ef2 100644 --- a/lib/nokogiri/css/tokenizer.rex +++ b/lib/nokogiri/css/tokenizer.rex @@ -22,7 +22,7 @@ rule # [:state] pattern [actions] - \has\({w} { [:HAS, text] } + has\({w} { [:HAS, text] } {ident}\({w} { [:FUNCTION, text] } {ident} { [:IDENT, text] } \#{name} { [:HASH, text] } diff --git a/lib/nokogiri/html.rb b/lib/nokogiri/html.rb index 11de5ad63b7..5d0e73104d1 100644 --- a/lib/nokogiri/html.rb +++ b/lib/nokogiri/html.rb @@ -4,6 +4,7 @@ require 'nokogiri/html/sax/parser_context' require 'nokogiri/html/sax/parser' require 'nokogiri/html/element_description' +require 'nokogiri/html/element_description_defaults' module Nokogiri class << self diff --git a/lib/nokogiri/html/element_description_defaults.rb b/lib/nokogiri/html/element_description_defaults.rb new file mode 100644 index 00000000000..4a63801eca2 --- /dev/null +++ b/lib/nokogiri/html/element_description_defaults.rb @@ -0,0 +1,671 @@ +module Nokogiri + module HTML + class ElementDescription + + # Methods are defined protected by method_defined? because at + # this point the C-library or Java library is alraedy loaded, + # and we don't want to clobber any methods that have been + # defined there. + + Desc = Struct.new("HTMLElementDescription", :name, + :startTag, :endTag, :saveEndTag, + :empty, :depr, :dtd, :isinline, + :desc, + :subelts, :defaultsubelt, + :attrs_opt, :attrs_depr, :attrs_req) + + # This is filled in down below. + DefaultDescriptions = Hash.new() + + def default_desc + DefaultDescriptions[name.downcase] + end + private :default_desc + + unless method_defined? :implied_start_tag? + def implied_start_tag? + d = default_desc + d ? d.startTag : nil + end + end + + unless method_defined? :implied_end_tag? + def implied_end_tag? + d = default_desc + d ? d.endTag : nil + end + end + + unless method_defined? :save_end_tag? + def save_end_tag? + d = default_desc + d ? d.saveEndTag : nil + end + end + + unless method_defined? :deprecated? + def deprecated? + d = default_desc + d ? d.depr : nil + end + end + + unless method_defined? :description + def description + d = default_desc + d ? d.desc : nil + end + end + + unless method_defined? :default_sub_element + def default_sub_element + d = default_desc + d ? d.defaultsubelt : nil + end + end + + unless method_defined? :optional_attributes + def optional_attributes + d = default_desc + d ? d.attrs_opt : [] + end + end + + unless method_defined? :deprecated_attributes + def deprecated_attributes + d = default_desc + d ? d.attrs_depr : [] + end + end + + unless method_defined? :required_attributes + def required_attributes + d = default_desc + d ? d.attrs_req : [] + end + end + + ### + # Default Element Descriptions (HTML 4.0) copied from + # libxml2/HTMLparser.c and libxml2/include/libxml/HTMLparser.h + # + # The copyright notice for those files and the following list of + # element and attribute descriptions is reproduced here: + # + # Except where otherwise noted in the source code (e.g. the + # files hash.c, list.c and the trio files, which are covered by + # a similar licence but with different Copyright notices) all + # the files are: + # + # Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person + # obtaining a copy of this software and associated documentation + # files (the "Software"), to deal in the Software without + # restriction, including without limitation the rights to use, + # copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the + # Software is fur- nished to do so, subject to the following + # conditions: + + # The above copyright notice and this permission notice shall be + # included in all copies or substantial portions of the + # Software. + + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + # WARRANTIES OF MERCHANTABILITY, FIT- NESS FOR A PARTICULAR + # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE DANIEL + # VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CON- NECTION WITH THE SOFTWARE OR THE USE + # OR OTHER DEALINGS IN THE SOFTWARE. + + # Except as contained in this notice, the name of Daniel + # Veillard shall not be used in advertising or otherwise to + # promote the sale, use or other deal- ings in this Software + # without prior written authorization from him. + + # Attributes defined and categorized + FONTSTYLE = ["tt", "i", "b", "u", "s", "strike", "big", "small"] + PHRASE = ['em', 'strong', 'dfn', 'code', 'samp', + 'kbd', 'var', 'cite', 'abbr', 'acronym'] + SPECIAL = ['a', 'img', 'applet', 'embed', 'object', 'font','basefont', + 'br', 'script', 'map', 'q', 'sub', 'sup', 'span', 'bdo', + 'iframe'] + PCDATA = [] + HEADING = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] + LIST = ['ul', 'ol', 'dir', 'menu'] + FORMCTRL = ['input', 'select', 'textarea', 'label', 'button'] + BLOCK = [HEADING, LIST, 'pre', 'p', 'dl', 'div', 'center', 'noscript', + 'noframes', 'blockquote', 'form', 'isindex', 'hr', 'table', + 'fieldset', 'address'] + INLINE = [PCDATA, FONTSTYLE, PHRASE, SPECIAL, FORMCTRL] + FLOW = [BLOCK, INLINE] + MODIFIER = [] + EMPTY = [] + + HTML_FLOW = FLOW + HTML_INLINE = INLINE + HTML_PCDATA = PCDATA + HTML_CDATA = HTML_PCDATA + + COREATTRS = ['id', 'class', 'style', 'title'] + I18N = ['lang', 'dir'] + EVENTS = ['onclick', 'ondblclick', 'onmousedown', 'onmouseup', + 'onmouseover', 'onmouseout', 'onkeypress', 'onkeydown', + 'onkeyup'] + ATTRS = [COREATTRS, I18N,EVENTS] + CELLHALIGN = ['align', 'char', 'charoff'] + CELLVALIGN = ['valign'] + + HTML_ATTRS = ATTRS + CORE_I18N_ATTRS = [COREATTRS, I18N] + CORE_ATTRS = COREATTRS + I18N_ATTRS = I18N + + + A_ATTRS = [ATTRS, 'charset', 'type', 'name', + 'href', 'hreflang', 'rel', 'rev', 'accesskey', 'shape', + 'coords', 'tabindex', 'onfocus', 'onblur'] + TARGET_ATTR = ['target'] + ROWS_COLS_ATTR = ['rows', 'cols'] + ALT_ATTR = ['alt'] + SRC_ALT_ATTRS = ['src', 'alt'] + HREF_ATTRS = ['href'] + CLEAR_ATTRS = ['clear'] + INLINE_P = [INLINE, 'p'] + + FLOW_PARAM = [FLOW, 'param'] + APPLET_ATTRS = [COREATTRS , 'codebase', + 'archive', 'alt', 'name', 'height', 'width', 'align', + 'hspace', 'vspace'] + AREA_ATTRS = ['shape', 'coords', 'href', 'nohref', + 'tabindex', 'accesskey', 'onfocus', 'onblur'] + BASEFONT_ATTRS = ['id', 'size', 'color', 'face'] + QUOTE_ATTRS = [ATTRS, 'cite'] + BODY_CONTENTS = [FLOW, 'ins', 'del'] + BODY_ATTRS = [ATTRS, 'onload', 'onunload'] + BODY_DEPR = ['background', 'bgcolor', 'text', + 'link', 'vlink', 'alink'] + BUTTON_ATTRS = [ATTRS, 'name', 'value', 'type', + 'disabled', 'tabindex', 'accesskey', 'onfocus', 'onblur'] + + + COL_ATTRS = [ATTRS, 'span', 'width', CELLHALIGN, CELLVALIGN] + COL_ELT = ['col'] + EDIT_ATTRS = [ATTRS, 'datetime', 'cite'] + COMPACT_ATTRS = [ATTRS, 'compact'] + DL_CONTENTS = ['dt', 'dd'] + COMPACT_ATTR = ['compact'] + LABEL_ATTR = ['label'] + FIELDSET_CONTENTS = [FLOW, 'legend' ] + FONT_ATTRS = [COREATTRS, I18N, 'size', 'color', 'face' ] + FORM_CONTENTS = [HEADING, LIST, INLINE, 'pre', 'p', 'div', 'center', + 'noscript', 'noframes', 'blockquote', 'isindex', 'hr', + 'table', 'fieldset', 'address'] + FORM_ATTRS = [ATTRS, 'method', 'enctype', 'accept', 'name', 'onsubmit', + 'onreset', 'accept-charset'] + FRAME_ATTRS = [COREATTRS, 'longdesc', 'name', 'src', 'frameborder', + 'marginwidth', 'marginheight', 'noresize', 'scrolling' ] + FRAMESET_ATTRS = [COREATTRS, 'rows', 'cols', 'onload', 'onunload'] + FRAMESET_CONTENTS = ['frameset', 'frame', 'noframes'] + HEAD_ATTRS = [I18N, 'profile'] + HEAD_CONTENTS = ['title', 'isindex', 'base', 'script', 'style', 'meta', + 'link', 'object'] + HR_DEPR = ['align', 'noshade', 'size', 'width'] + VERSION_ATTR = ['version'] + HTML_CONTENT = ['head', 'body', 'frameset'] + IFRAME_ATTRS = [COREATTRS, 'longdesc', 'name', 'src', 'frameborder', + 'marginwidth', 'marginheight', 'scrolling', 'align', + 'height', 'width'] + IMG_ATTRS = [ATTRS, 'longdesc', 'name', 'height', 'width', 'usemap', + 'ismap'] + EMBED_ATTRS = [COREATTRS, 'align', 'alt', 'border', 'code', 'codebase', + 'frameborder', 'height', 'hidden', 'hspace', 'name', + 'palette', 'pluginspace', 'pluginurl', 'src', 'type', + 'units', 'vspace', 'width'] + INPUT_ATTRS = [ATTRS, 'type', 'name', 'value', 'checked', 'disabled', + 'readonly', 'size', 'maxlength', 'src', 'alt', 'usemap', + 'ismap', 'tabindex', 'accesskey', 'onfocus', 'onblur', + 'onselect', 'onchange', 'accept'] + PROMPT_ATTRS = [COREATTRS, I18N, 'prompt'] + LABEL_ATTRS = [ATTRS, 'for', 'accesskey', 'onfocus', 'onblur'] + LEGEND_ATTRS = [ATTRS, 'accesskey'] + ALIGN_ATTR = ['align'] + LINK_ATTRS = [ATTRS, 'charset', 'href', 'hreflang', 'type', 'rel', 'rev', + 'media'] + MAP_CONTENTS = [BLOCK, 'area'] + NAME_ATTR = ['name'] + ACTION_ATTR = ['action'] + BLOCKLI_ELT = [BLOCK, 'li'] + META_ATTRS = [I18N, 'http-equiv', 'name', 'scheme'] + CONTENT_ATTR = ['content'] + TYPE_ATTR = ['type'] + NOFRAMES_CONTENT = ['body', FLOW, MODIFIER] + OBJECT_CONTENTS = [FLOW, 'param'] + OBJECT_ATTRS = [ATTRS, 'declare', 'classid', 'codebase', 'data', 'type', + 'codetype', 'archive', 'standby', 'height', 'width', + 'usemap', 'name', 'tabindex'] + OBJECT_DEPR = ['align', 'border', 'hspace', 'vspace'] + OL_ATTRS = ['type', 'compact', 'start'] + OPTION_ELT = ['option'] + OPTGROUP_ATTRS = [ATTRS, 'disabled'] + OPTION_ATTRS = [ATTRS, 'disabled', 'label', 'selected', 'value'] + PARAM_ATTRS = ['id', 'value', 'valuetype', 'type'] + WIDTH_ATTR = ['width'] + PRE_CONTENT = [PHRASE, 'tt', 'i', 'b', 'u', 's', 'strike', 'a', 'br', + 'script', 'map', 'q', 'span', 'bdo', 'iframe'] + SCRIPT_ATTRS = ['charset', 'src', 'defer', 'event', 'for'] + LANGUAGE_ATTR = ['language'] + SELECT_CONTENT = ['optgroup', 'option'] + SELECT_ATTRS = [ATTRS, 'name', 'size', 'multiple', 'disabled', 'tabindex', + 'onfocus', 'onblur', 'onchange'] + STYLE_ATTRS = [I18N, 'media', 'title'] + TABLE_ATTRS = [ATTRS, 'summary', 'width', 'border', 'frame', 'rules', + 'cellspacing', 'cellpadding', 'datapagesize'] + TABLE_DEPR = ['align', 'bgcolor'] + TABLE_CONTENTS = ['caption', 'col', 'colgroup', 'thead', 'tfoot', 'tbody', + 'tr'] + TR_ELT = ['tr'] + TALIGN_ATTRS = [ATTRS, CELLHALIGN, CELLVALIGN] + TH_TD_DEPR = ['nowrap', 'bgcolor', 'width', 'height'] + TH_TD_ATTR = [ATTRS, 'abbr', 'axis', 'headers', 'scope', 'rowspan', + 'colspan', CELLHALIGN, CELLVALIGN] + TEXTAREA_ATTRS = [ATTRS, 'name', 'disabled', 'readonly', 'tabindex', + 'accesskey', 'onfocus', 'onblur', 'onselect', + 'onchange'] + TR_CONTENTS = ['th', 'td'] + BGCOLOR_ATTR = ['bgcolor'] + LI_ELT = ['li'] + UL_DEPR = ['type', 'compact'] + DIR_ATTR = ['dir'] + + [ + ['a', false, false, false, false, false, :any, true, + 'anchor ', + HTML_INLINE, nil, A_ATTRS, TARGET_ATTR, [] + ], + ['abbr', false, false, false, false, false, :any, true, + 'abbreviated form', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['acronym', false, false, false, false, false, :any, true, '', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['address', false, false, false, false, false, :any, false, + 'information on author', + INLINE_P , nil, HTML_ATTRS, [], [] + ], + ['applet', false, false, false, false, true, :loose, true, + 'java applet ', + FLOW_PARAM, nil, [], APPLET_ATTRS, [] + ], + ['area', false, true, true, true, false, :any, false, + 'client-side image map area ', + EMPTY, nil, AREA_ATTRS, TARGET_ATTR, ALT_ATTR + ], + ['b', false, true, false, false, false, :any, true, + 'bold text style', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['base', false, true, true, true, false, :any, false, + 'document base uri ', + EMPTY, nil, [], TARGET_ATTR, HREF_ATTRS + ], + ['basefont', false, true, true, true, true, :loose, true, + 'base font size ', + EMPTY, nil, [], BASEFONT_ATTRS, [] + ], + ['bdo', false, false, false, false, false, :any, true, + 'i18n bidi over-ride ', + HTML_INLINE, nil, CORE_I18N_ATTRS, [], DIR_ATTR + ], + ['big', false, true, false, false, false, :any, true, + 'large text style', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['blockquote', false, false, false, false, false, :any, false, + 'long quotation ', + HTML_FLOW, nil, QUOTE_ATTRS, [], [] + ], + ['body', true, true, false, false, false, :any, false, + 'document body ', + BODY_CONTENTS, 'div', BODY_ATTRS, BODY_DEPR, [] + ], + ['br', false, true, true, true, false, :any, true, + 'forced line break ', + EMPTY, nil, CORE_ATTRS, CLEAR_ATTRS, [] + ], + ['button', false, false, false, false, false, :any, true, + 'push button ', + [HTML_FLOW, MODIFIER], nil, BUTTON_ATTRS, [], [] + ], + ['caption', false, false, false, false, false, :any, false, + 'table caption ', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['center', false, true, false, false, true, :loose, false, + 'shorthand for div align=center ', + HTML_FLOW, nil, [], HTML_ATTRS, [] + ], + ['cite', false, false, false, false, false, :any, true, 'citation', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['code', false, false, false, false, false, :any, true, + 'computer code fragment', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['col', false, true, true, true, false, :any, false, 'table column ', + EMPTY, nil, COL_ATTRS, [], [] + ], + ['colgroup', false, true, false, false, false, :any, false, + 'table column group ', + COL_ELT, 'col', COL_ATTRS, [], [] + ], + ['dd', false, true, false, false, false, :any, false, + 'definition description ', + HTML_FLOW, nil, HTML_ATTRS, [], [] + ], + ['del', false, false, false, false, false, :any, true, + 'deleted text ', + HTML_FLOW, nil, EDIT_ATTRS, [], [] + ], + ['dfn', false, false, false, false, false, :any, true, + 'instance definition', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['dir', false, false, false, false, true, :loose, false, + 'directory list', + BLOCKLI_ELT, 'li', [], COMPACT_ATTRS, [] + ], + ['div', false, false, false, false, false, :any, false, + 'generic language/style container', + HTML_FLOW, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['dl', false, false, false, false, false, :any, false, + 'definition list ', + DL_CONTENTS, 'dd', HTML_ATTRS, COMPACT_ATTR, [] + ], + ['dt', false, true, false, false, false, :any, false, + 'definition term ', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['em', false, true, false, false, false, :any, true, + 'emphasis', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['embed', false, true, false, false, true, :loose, true, + 'generic embedded object ', + EMPTY, nil, EMBED_ATTRS, [], [] + ], + ['fieldset', false, false, false, false, false, :any, false, + 'form control group ', + FIELDSET_CONTENTS, nil, HTML_ATTRS, [], [] + ], + ['font', false, true, false, false, true, :loose, true, + 'local change to font ', + HTML_INLINE, nil, [], FONT_ATTRS, [] + ], + ['form', false, false, false, false, false, :any, false, + 'interactive form ', + FORM_CONTENTS, 'fieldset', FORM_ATTRS, TARGET_ATTR, ACTION_ATTR + ], + ['frame', false, true, true, true, false, :frameset, false, + 'subwindow ', + EMPTY, nil, [], FRAME_ATTRS, [] + ], + ['frameset', false, false, false, false, false, :frameset, false, + 'window subdivision', + FRAMESET_CONTENTS, 'noframes', [], FRAMESET_ATTRS, [] + ], + ['htrue', false, false, false, false, false, :any, false, + 'heading ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['htrue', false, false, false, false, false, :any, false, + 'heading ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['htrue', false, false, false, false, false, :any, false, + 'heading ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['h4', false, false, false, false, false, :any, false, + 'heading ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['h5', false, false, false, false, false, :any, false, + 'heading ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['h6', false, false, false, false, false, :any, false, + 'heading ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['head', true, true, false, false, false, :any, false, + 'document head ', + HEAD_CONTENTS, nil, HEAD_ATTRS, [], [] + ], + ['hr', false, true, true, true, false, :any, false, + 'horizontal rule ', + EMPTY, nil, HTML_ATTRS, HR_DEPR, [] + ], + ['html', true, true, false, false, false, :any, false, + 'document root element ', + HTML_CONTENT, nil, I18N_ATTRS, VERSION_ATTR, [] + ], + ['i', false, true, false, false, false, :any, true, + 'italic text style', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['iframe', false, false, false, false, false, :any, true, + 'inline subwindow ', + HTML_FLOW, nil, [], IFRAME_ATTRS, [] + ], + ['img', false, true, true, true, false, :any, true, + 'embedded image ', + EMPTY, nil, IMG_ATTRS, ALIGN_ATTR, SRC_ALT_ATTRS + ], + ['input', false, true, true, true, false, :any, true, + 'form control ', + EMPTY, nil, INPUT_ATTRS, ALIGN_ATTR, [] + ], + ['ins', false, false, false, false, false, :any, true, + 'inserted text', + HTML_FLOW, nil, EDIT_ATTRS, [], [] + ], + ['isindex', false, true, true, true, true, :loose, false, + 'single line prompt ', + EMPTY, nil, [], PROMPT_ATTRS, [] + ], + ['kbd', false, false, false, false, false, :any, true, + 'text to be entered by the user', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['label', false, false, false, false, false, :any, true, + 'form field label text ', + [HTML_INLINE, MODIFIER], nil, LABEL_ATTRS, [], [] + ], + ['legend', false, false, false, false, false, :any, false, + 'fieldset legend ', + HTML_INLINE, nil, LEGEND_ATTRS, ALIGN_ATTR, [] + ], + ['li', false, true, true, false, false, :any, false, + 'list item ', + HTML_FLOW, nil, HTML_ATTRS, [], [] + ], + ['link', false, true, true, true, false, :any, false, + 'a media-independent link ', + EMPTY, nil, LINK_ATTRS, TARGET_ATTR, [] + ], + ['map', false, false, false, false, false, :any, true, + 'client-side image map ', + MAP_CONTENTS, nil, HTML_ATTRS, [], NAME_ATTR + ], + ['menu', false, false, false, false, true, :loose, false, + 'menu list ', + BLOCKLI_ELT, nil, [], COMPACT_ATTRS, [] + ], + ['meta', false, true, true, true, false, :any, false, + 'generic metainformation ', + EMPTY, nil, META_ATTRS, [], CONTENT_ATTR + ], + ['noframes', false, false, false, false, false, :frameset, false, + 'alternate content container for non frame-based rendering ', + NOFRAMES_CONTENT, 'body', HTML_ATTRS, [], [] + ], + ['noscript', false, false, false, false, false, :any, false, + 'alternate content container for non script-based rendering ', + HTML_FLOW, 'div', HTML_ATTRS, [], [] + ], + ['object', false, false, false, false, false, :any, true, + 'generic embedded object ', + OBJECT_CONTENTS, 'div', OBJECT_ATTRS, OBJECT_DEPR, [] + ], + ['ol', false, false, false, false, false, :any, false, + 'ordered list ', + LI_ELT, 'li', HTML_ATTRS, OL_ATTRS, [] + ], + ['optgroup', false, false, false, false, false, :any, false, + 'option group ', + OPTION_ELT, 'option', OPTGROUP_ATTRS, [], LABEL_ATTR + ], + ['option', false, true, false, false, false, :any, false, + 'selectable choice ', + HTML_PCDATA, nil, OPTION_ATTRS, [], [] + ], + ['p', false, true, false, false, false, :any, false, + 'paragraph ', + HTML_INLINE, nil, HTML_ATTRS, ALIGN_ATTR, [] + ], + ['param', false, true, true, true, false, :any, false, + 'named property value ', + EMPTY, nil, PARAM_ATTRS, [], NAME_ATTR + ], + ['pre', false, false, false, false, false, :any, false, + 'preformatted text ', + PRE_CONTENT, nil, HTML_ATTRS, WIDTH_ATTR, [] + ], + ['q', false, false, false, false, false, :any, true, + 'short inline quotation ', + HTML_INLINE, nil, QUOTE_ATTRS, [], [] + ], + ['s', false, true, false, false, true, :loose, true, + 'strike-through text style', + HTML_INLINE, nil, [], HTML_ATTRS, [] + ], + ['samp', false, false, false, false, false, :any, true, + 'sample program output, scripts, etc.', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['script', false, false, false, false, false, :any, true, + 'script statements ', + HTML_CDATA, nil, SCRIPT_ATTRS, LANGUAGE_ATTR, TYPE_ATTR + ], + ['select', false, false, false, false, false, :any, true, + 'option selector ', + SELECT_CONTENT, nil, SELECT_ATTRS, [], [] + ], + ['small', false, true, false, false, false, :any, true, + 'small text style', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['span', false, false, false, false, false, :any, true, + 'generic language/style container ', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['strike', false, true, false, false, true, :loose, true, + 'strike-through text', + HTML_INLINE, nil, [], HTML_ATTRS, [] + ], + ['strong', false, true, false, false, false, :any, true, + 'strong emphasis', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['style', false, false, false, false, false, :any, false, + 'style info ', + HTML_CDATA, nil, STYLE_ATTRS, [], TYPE_ATTR + ], + ['sub', false, true, false, false, false, :any, true, + 'subscript', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['sup', false, true, false, false, false, :any, true, + 'superscript ', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['table', false, false, false, false, false, :any, false, + '', + TABLE_CONTENTS, 'tr', TABLE_ATTRS, TABLE_DEPR, [] + ], + ['tbody', true, false, false, false, false, :any, false, + 'table body ', + TR_ELT, 'tr', TALIGN_ATTRS, [], [] + ], + ['td', false, false, false, false, false, :any, false, + 'table data cell', + HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, [] + ], + ['textarea', false, false, false, false, false, :any, true, + 'multi-line text field ', + HTML_PCDATA, nil, TEXTAREA_ATTRS, [], ROWS_COLS_ATTR + ], + ['tfoot', false, true, false, false, false, :any, false, + 'table footer ', + TR_ELT, 'tr', TALIGN_ATTRS, [], [] + ], + ['th', false, true, false, false, false, :any, false, + 'table header cell', + HTML_FLOW, nil, TH_TD_ATTR, TH_TD_DEPR, [] + ], + ['thead', false, true, false, false, false, :any, false, + 'table header ', + TR_ELT, 'tr', TALIGN_ATTRS, [], [] + ], + ['title', false, false, false, false, false, :any, false, + 'document title ', + HTML_PCDATA, nil, I18N_ATTRS, [], [] + ], + ['tr', false, false, false, false, false, :any, false, + 'table row ', + TR_CONTENTS, 'td', TALIGN_ATTRS, BGCOLOR_ATTR, [] + ], + ['tt', false, true, false, false, false, :any, true, + 'teletype or monospaced text style', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ], + ['u', false, true, false, false, true, :loose, true, + 'underlined text style', + HTML_INLINE, nil, [], HTML_ATTRS, [] + ], + ['ul', false, false, false, false, false, :any, false, + 'unordered list ', + LI_ELT, 'li', HTML_ATTRS, UL_DEPR, [] + ], + ['var', false, false, false, false, false, :any, true, + 'instance of a variable or program argument', + HTML_INLINE, nil, HTML_ATTRS, [], [] + ] + ].each do |descriptor| + name = descriptor[0] + + begin + d = Desc.new(*descriptor) + + # flatten all the attribute lists (Ruby1.9, *[a,b,c] can be + # used to flatten a literal list, but not in Ruby1.8). + d[:subelts] = d[:subelts].flatten + d[:attrs_opt] = d[:attrs_opt].flatten + d[:attrs_depr] = d[:attrs_depr].flatten + d[:attrs_req] = d[:attrs_req].flatten + rescue => e + p name + raise e + end + + DefaultDescriptions[name] = d + end + end + end +end diff --git a/test/html/test_document_fragment.rb b/test/html/test_document_fragment.rb index 55a249999f6..7e1065dcb1f 100644 --- a/test/html/test_document_fragment.rb +++ b/test/html/test_document_fragment.rb @@ -83,7 +83,8 @@ def test_html_fragment def test_html_fragment_has_outer_text doc = "a
b
c" fragment = Nokogiri::HTML::Document.new.fragment(doc) - if Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16" + if Nokogiri.uses_libxml? && + Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16" assert_equal "a
b

c

", fragment.to_s else assert_equal "a
b
c", fragment.to_s diff --git a/test/test_nokogiri.rb b/test/test_nokogiri.rb index 5c16f5117a0..6e8876fb7ec 100644 --- a/test/test_nokogiri.rb +++ b/test/test_nokogiri.rb @@ -22,16 +22,18 @@ def test_versions assert ! Nokogiri.ffi? end - assert_match version_match, Nokogiri::VERSION_INFO['libxml']['loaded'] - Nokogiri::LIBXML_PARSER_VERSION =~ /(\d)(\d{2})(\d{2})/ - major = $1.to_i - minor = $2.to_i - bug = $3.to_i - assert_equal "#{major}.#{minor}.#{bug}", Nokogiri::VERSION_INFO['libxml']['loaded'] + if Nokogiri.uses_libxml? + assert_match version_match, Nokogiri::VERSION_INFO['libxml']['loaded'] + Nokogiri::LIBXML_PARSER_VERSION =~ /(\d)(\d{2})(\d{2})/ + major = $1.to_i + minor = $2.to_i + bug = $3.to_i + assert_equal "#{major}.#{minor}.#{bug}", Nokogiri::VERSION_INFO['libxml']['loaded'] + end end def test_libxml_iconv - assert Nokogiri.const_defined?(:LIBXML_ICONV_ENABLED) + assert Nokogiri.const_defined?(:LIBXML_ICONV_ENABLED) if Nokogiri.uses_libxml? end def test_parse_with_io