-
-
Notifications
You must be signed in to change notification settings - Fork 902
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2278 from sparklemotion/flavorjones-introduce-htm…
…l4-namespace introduce html4 namespace --- **What problem is this PR intended to solve?** As the Nokogumbo merger progresses (see #2204), we now have an `HTML5` module and namespace, but the previous libxml2-(and nekohtml-) based functionality is parked under the ambiguous `HTML` module and namespace. I'd like to disambiguate, and also introduce an opportunity for us to use `HTML` for more general use in the future (e.g., perhaps detection of HTML doc format and choosing the right DOM parser). This PR moves everything currently under `HTML` to `HTML4`, and makes `HTML` an alias for `HTML4`. It updates doc strings and class names. Some changes in behavior that I want to note: - objects will report a class of `Nokogiri::HTML4::XXX` where they previously reported `Nokogiri::HTML::XXX` - some of the exported C symbols have been renamed (e.g., `mNokogiriHTML` is now `mNokogiriHTML4`) which might impact anyone writing C code and linking against Nokogiri's dylib **Have you included adequate test coverage?** I've left the tests alone (except for the addition of some "HTML/HTML4 equivalence" tests) to demonstrate there's no behavioral breakage. **Does this change affect the behavior of either the C or the Java implementations?** Notably, I've updated the Java files to rename classes and variable, and use the proper module and class names, so that it stays in sync with CRuby despite not having an `HTML5` module/namespace.
- Loading branch information
Showing
74 changed files
with
703 additions
and
745 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,13 +18,13 @@ | |
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; | ||
|
||
/** | ||
* Class for Nokogiri::HTML::Document. | ||
* Class for Nokogiri::HTML4::Document. | ||
* | ||
* @author sergio | ||
* @author Yoko Harada <[email protected]> | ||
*/ | ||
@JRubyClass(name = "Nokogiri::HTML::Document", parent = "Nokogiri::XML::Document") | ||
public class HtmlDocument extends XmlDocument | ||
@JRubyClass(name = "Nokogiri::HTML4::Document", parent = "Nokogiri::XML::Document") | ||
public class Html4Document extends XmlDocument | ||
{ | ||
private static final String DEFAULT_CONTENT_TYPE = "html"; | ||
private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN"; | ||
|
@@ -33,19 +33,19 @@ public class HtmlDocument extends XmlDocument | |
private String parsed_encoding = null; | ||
|
||
public | ||
HtmlDocument(Ruby ruby, RubyClass klazz) | ||
Html4Document(Ruby ruby, RubyClass klazz) | ||
{ | ||
super(ruby, klazz); | ||
} | ||
|
||
public | ||
HtmlDocument(Ruby runtime, Document document) | ||
Html4Document(Ruby runtime, Document document) | ||
{ | ||
this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document); | ||
} | ||
|
||
public | ||
HtmlDocument(Ruby ruby, RubyClass klazz, Document doc) | ||
Html4Document(Ruby ruby, RubyClass klazz, Document doc) | ||
{ | ||
super(ruby, klazz, doc); | ||
} | ||
|
@@ -55,10 +55,10 @@ public class HtmlDocument extends XmlDocument | |
rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args) | ||
{ | ||
final Ruby runtime = context.runtime; | ||
HtmlDocument htmlDocument; | ||
Html4Document htmlDocument; | ||
try { | ||
Document docNode = createNewDocument(runtime); | ||
htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz); | ||
htmlDocument = (Html4Document) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz); | ||
htmlDocument.setDocumentNode(context.runtime, docNode); | ||
} catch (Exception ex) { | ||
throw asRuntimeError(runtime, "couldn't create document: ", ex); | ||
|
@@ -135,13 +135,6 @@ public class HtmlDocument extends XmlDocument | |
return parsed_encoding; | ||
} | ||
|
||
/* | ||
* call-seq: | ||
* read_io(io, url, encoding, options) | ||
* | ||
* Read the HTML document from +io+ with given +url+, +encoding+, | ||
* and +options+. See Nokogiri::HTML.parse | ||
*/ | ||
@JRubyMethod(meta = true, required = 4) | ||
public static IRubyObject | ||
read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) | ||
|
@@ -151,13 +144,6 @@ public class HtmlDocument extends XmlDocument | |
return ctx.parse(context, (RubyClass) klass, args[1]); | ||
} | ||
|
||
/* | ||
* call-seq: | ||
* read_memory(string, url, encoding, options) | ||
* | ||
* Read the HTML document contained in +string+ with given +url+, +encoding+, | ||
* and +options+. See Nokogiri::HTML.parse | ||
*/ | ||
@JRubyMethod(meta = true, required = 4) | ||
public static IRubyObject | ||
read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,12 +16,12 @@ | |
import org.jruby.runtime.builtin.IRubyObject; | ||
|
||
/** | ||
* Class for Nokogiri::HTML::ElementDescription. | ||
* Class for Nokogiri::HTML4::ElementDescription. | ||
* | ||
* @author Patrick Mahoney <[email protected]> | ||
*/ | ||
@JRubyClass(name = "Nokogiri::HTML::ElementDescription") | ||
public class HtmlElementDescription extends RubyObject | ||
@JRubyClass(name = "Nokogiri::HTML4::ElementDescription") | ||
public class Html4ElementDescription extends RubyObject | ||
{ | ||
|
||
/** | ||
|
@@ -38,7 +38,7 @@ public class HtmlElementDescription extends RubyObject | |
protected HTMLElements.Element element; | ||
|
||
public | ||
HtmlElementDescription(Ruby runtime, RubyClass rubyClass) | ||
Html4ElementDescription(Ruby runtime, RubyClass rubyClass) | ||
{ | ||
super(runtime, rubyClass); | ||
} | ||
|
@@ -89,8 +89,8 @@ public class HtmlElementDescription extends RubyObject | |
return context.nil; | ||
} | ||
|
||
HtmlElementDescription desc = | ||
new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz); | ||
Html4ElementDescription desc = | ||
new Html4ElementDescription(context.getRuntime(), (RubyClass)klazz); | ||
desc.element = elem; | ||
return desc; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,16 +12,16 @@ | |
import org.jruby.runtime.builtin.IRubyObject; | ||
|
||
/** | ||
* Class for Nokogiri::HTML::EntityLookup. | ||
* Class for Nokogiri::HTML4::EntityLookup. | ||
* | ||
* @author Patrick Mahoney <[email protected]> | ||
*/ | ||
@JRubyClass(name = "Nokogiri::HTML::EntityLookup") | ||
public class HtmlEntityLookup extends RubyObject | ||
@JRubyClass(name = "Nokogiri::HTML4::EntityLookup") | ||
public class Html4EntityLookup extends RubyObject | ||
{ | ||
|
||
public | ||
HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) | ||
Html4EntityLookup(Ruby runtime, RubyClass rubyClass) | ||
{ | ||
super(runtime, rubyClass); | ||
} | ||
|
@@ -41,7 +41,7 @@ public class HtmlEntityLookup extends RubyObject | |
if (val == -1) { return ruby.getNil(); } | ||
|
||
IRubyObject edClass = | ||
ruby.getClassFromPath("Nokogiri::HTML::EntityDescription"); | ||
ruby.getClassFromPath("Nokogiri::HTML4::EntityDescription"); | ||
IRubyObject edObj = invoke(context, edClass, "new", | ||
ruby.newFixnum(val), ruby.newString(name), | ||
ruby.newString(name + " entity")); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,27 +24,27 @@ | |
import static nokogiri.internals.NokogiriHelpers.rubyStringToString; | ||
|
||
/** | ||
* Class for Nokogiri::HTML::SAX::ParserContext. | ||
* Class for Nokogiri::HTML4::SAX::ParserContext. | ||
* | ||
* @author serabe | ||
* @author Patrick Mahoney <[email protected]> | ||
* @author Yoko Harada <[email protected]> | ||
*/ | ||
|
||
@JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext") | ||
public class HtmlSaxParserContext extends XmlSaxParserContext | ||
@JRubyClass(name = "Nokogiri::HTML4::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext") | ||
public class Html4SaxParserContext extends XmlSaxParserContext | ||
{ | ||
|
||
static HtmlSaxParserContext | ||
static Html4SaxParserContext | ||
newInstance(final Ruby runtime, final RubyClass klazz) | ||
{ | ||
HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz); | ||
Html4SaxParserContext instance = new Html4SaxParserContext(runtime, klazz); | ||
instance.initialize(runtime); | ||
return instance; | ||
} | ||
|
||
public | ||
HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) | ||
Html4SaxParserContext(Ruby ruby, RubyClass rubyClass) | ||
{ | ||
super(ruby, rubyClass); | ||
} | ||
|
@@ -68,7 +68,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext | |
return parser; | ||
} catch (SAXException ex) { | ||
throw new SAXException( | ||
"Problem while creating HTML SAX Parser: " + ex.toString()); | ||
"Problem while creating HTML4 SAX Parser: " + ex.toString()); | ||
} | ||
} | ||
|
||
|
@@ -79,7 +79,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext | |
IRubyObject data, | ||
IRubyObject encoding) | ||
{ | ||
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz); | ||
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klazz); | ||
String javaEncoding = findEncodingName(context, encoding); | ||
if (javaEncoding != null) { | ||
CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding); | ||
|
@@ -231,7 +231,7 @@ static EncodingType get(final int ordinal) | |
IRubyObject data, | ||
IRubyObject encoding) | ||
{ | ||
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass); | ||
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); | ||
ctx.setInputSourceFile(context, data); | ||
String javaEncoding = findEncodingName(context, encoding); | ||
if (javaEncoding != null) { | ||
|
@@ -247,7 +247,7 @@ static EncodingType get(final int ordinal) | |
IRubyObject data, | ||
IRubyObject encoding) | ||
{ | ||
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass); | ||
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); | ||
ctx.setIOInputSource(context, data, context.nil); | ||
String javaEncoding = findEncodingName(context, encoding); | ||
if (javaEncoding != null) { | ||
|
@@ -258,12 +258,12 @@ static EncodingType get(final int ordinal) | |
|
||
/** | ||
* Create a new parser context that will read from a raw input stream. | ||
* Meant to be run in a separate thread by HtmlSaxPushParser. | ||
* Meant to be run in a separate thread by Html4SaxPushParser. | ||
*/ | ||
static HtmlSaxParserContext | ||
static Html4SaxParserContext | ||
parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) | ||
{ | ||
HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass); | ||
Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(runtime, klass); | ||
ctx.setInputSource(stream); | ||
return ctx; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,25 +27,25 @@ | |
import org.jruby.runtime.builtin.IRubyObject; | ||
|
||
/** | ||
* Class for Nokogiri::HTML::SAX::PushParser | ||
* Class for Nokogiri::HTML4::SAX::PushParser | ||
* | ||
* @author | ||
* @author Piotr Szmielew <[email protected]> - based on Nokogiri::XML::SAX::PushParser | ||
*/ | ||
@JRubyClass(name = "Nokogiri::HTML::SAX::PushParser") | ||
public class HtmlSaxPushParser extends RubyObject | ||
@JRubyClass(name = "Nokogiri::HTML4::SAX::PushParser") | ||
public class Html4SaxPushParser extends RubyObject | ||
{ | ||
ParserContext.Options options; | ||
IRubyObject saxParser; | ||
|
||
NokogiriBlockingQueueInputStream stream; | ||
|
||
private ParserTask parserTask = null; | ||
private FutureTask<HtmlSaxParserContext> futureTask = null; | ||
private FutureTask<Html4SaxParserContext> futureTask = null; | ||
private ExecutorService executor = null; | ||
|
||
public | ||
HtmlSaxPushParser(Ruby ruby, RubyClass rubyClass) | ||
Html4SaxPushParser(Ruby ruby, RubyClass rubyClass) | ||
{ | ||
super(ruby, rubyClass); | ||
} | ||
|
@@ -111,7 +111,7 @@ public class HtmlSaxPushParser extends RubyObject | |
final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk); | ||
if (data == null) { | ||
terminateTask(context.runtime); | ||
throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML::SyntaxError | ||
throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML4::SyntaxError | ||
} | ||
|
||
int errorCount0 = parserTask.getErrorCount(); | ||
|
@@ -149,12 +149,12 @@ public class HtmlSaxPushParser extends RubyObject | |
|
||
assert saxParser != null : "saxParser null"; | ||
parserTask = new ParserTask(context, saxParser, stream); | ||
futureTask = new FutureTask<HtmlSaxParserContext>((Callable) parserTask); | ||
futureTask = new FutureTask<Html4SaxParserContext>((Callable) parserTask); | ||
executor = Executors.newSingleThreadExecutor(new ThreadFactory() { | ||
@Override | ||
public Thread newThread(Runnable r) { | ||
Thread t = new Thread(r); | ||
t.setName("HtmlSaxPushParser"); | ||
t.setName("Html4SaxPushParser"); | ||
t.setDaemon(true); | ||
return t; | ||
} | ||
|
@@ -187,14 +187,14 @@ public Thread newThread(Runnable r) { | |
futureTask = null; | ||
} | ||
|
||
private static HtmlSaxParserContext | ||
private static Html4SaxParserContext | ||
parse(final Ruby runtime, final InputStream stream) | ||
{ | ||
RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML::SAX::ParserContext"); | ||
return HtmlSaxParserContext.parse_stream(runtime, klazz, stream); | ||
RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML4::SAX::ParserContext"); | ||
return Html4SaxParserContext.parse_stream(runtime, klazz, stream); | ||
} | ||
|
||
static class ParserTask extends XmlSaxPushParser.ParserTask /* <HtmlSaxPushParser> */ | ||
static class ParserTask extends XmlSaxPushParser.ParserTask /* <Html4SaxPushParser> */ | ||
{ | ||
|
||
private | ||
|
@@ -204,10 +204,10 @@ static class ParserTask extends XmlSaxPushParser.ParserTask /* <HtmlSaxPushParse | |
} | ||
|
||
@Override | ||
public HtmlSaxParserContext | ||
public Html4SaxParserContext | ||
call() throws Exception | ||
{ | ||
return (HtmlSaxParserContext) super.call(); | ||
return (Html4SaxParserContext) super.call(); | ||
} | ||
|
||
} | ||
|
Oops, something went wrong.