-
-
Notifications
You must be signed in to change notification settings - Fork 904
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switch code from Sergio's version to pmahoney's version. New code doe…
…sn't have a two-layer design, and is much different in this point. New code uses nekodtd, so nekodtd.jar is newly added.
- Loading branch information
Showing
68 changed files
with
4,964 additions
and
2,693 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
package nokogiri; | ||
|
||
import java.util.HashMap; | ||
|
||
import org.jruby.Ruby; | ||
import org.jruby.RubyArray; | ||
import org.jruby.RubyClass; | ||
import org.jruby.RubyObject; | ||
import org.jruby.anno.JRubyMethod; | ||
import org.jruby.runtime.ThreadContext; | ||
import org.jruby.runtime.builtin.IRubyObject; | ||
import org.w3c.dom.Element; | ||
import org.w3c.dom.Node; | ||
|
||
/** | ||
* Stub class to satisfy unit tests. I'm not sure where this class is | ||
* meant to be used. As coded it won't really interact with any other | ||
* classes and will have no effect on character encodings reported by | ||
* documents being parsed. | ||
* | ||
* @author Patrick Mahoney <[email protected]> | ||
*/ | ||
public class EncodingHandler extends RubyObject { | ||
protected static HashMap<String,String> map = new HashMap<String,String>(); | ||
static { | ||
addInitial(); | ||
} | ||
|
||
protected String name; | ||
|
||
protected static void addInitial() { | ||
map.put("UTF-8", "UTF-8"); | ||
} | ||
|
||
public EncodingHandler(Ruby ruby, RubyClass klass, String value) { | ||
super(ruby, klass); | ||
name = value; | ||
} | ||
|
||
@JRubyMethod(name="[]", meta=true) | ||
public static IRubyObject get(ThreadContext context, | ||
IRubyObject _klass, | ||
IRubyObject keyObj) { | ||
Ruby ruby = context.getRuntime(); | ||
String key = keyObj.toString(); | ||
String value = map.get(key); | ||
if (value == null) | ||
return ruby.getNil(); | ||
|
||
return new EncodingHandler( | ||
ruby, | ||
(RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler"), | ||
value); | ||
} | ||
|
||
@JRubyMethod(meta=true) | ||
public static IRubyObject delete(ThreadContext context, | ||
IRubyObject _klass, | ||
IRubyObject keyObj) { | ||
String key = keyObj.toString(); | ||
String value = map.remove(key); | ||
if (value == null) | ||
return context.getRuntime().getNil(); | ||
return context.getRuntime().newString(value); | ||
} | ||
|
||
@JRubyMethod(name="clear_aliases!", meta=true) | ||
public static IRubyObject clear_aliases(ThreadContext context, | ||
IRubyObject _klass) { | ||
map.clear(); | ||
addInitial(); | ||
return context.getRuntime().getNil(); | ||
} | ||
|
||
@JRubyMethod(meta=true) | ||
public static IRubyObject alias(ThreadContext context, | ||
IRubyObject _klass, | ||
IRubyObject orig, | ||
IRubyObject alias) { | ||
String value = map.get(orig.toString()); | ||
if (value != null) | ||
map.put(alias.toString(), value); | ||
|
||
return context.getRuntime().getNil(); | ||
} | ||
|
||
@JRubyMethod | ||
public IRubyObject name(ThreadContext context) { | ||
return context.getRuntime().newString(name); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
package nokogiri; | ||
|
||
import org.jruby.Ruby; | ||
import org.jruby.RubyClass; | ||
import org.jruby.RubyObject; | ||
import org.jruby.anno.JRubyMethod; | ||
import org.jruby.exceptions.RaiseException; | ||
import org.jruby.runtime.ThreadContext; | ||
import org.jruby.runtime.builtin.IRubyObject; | ||
import org.cyberneko.html.HTMLElements; | ||
import org.cyberneko.html.HTMLElements.Element; | ||
|
||
import java.util.Map; | ||
import java.util.List; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.Collections; | ||
|
||
import static org.jruby.javasupport.util.RuntimeHelpers.invoke; | ||
|
||
/** | ||
* @author Patrick Mahoney <[email protected]> | ||
*/ | ||
public class HtmlElementDescription extends RubyObject { | ||
|
||
/** | ||
* Stores memoized hash of element -> list of valid subelements. | ||
*/ | ||
static protected Map<Short, List<String>> subElements; | ||
static { | ||
Map<Short, List<String>> _subElements = | ||
new HashMap<Short, List<String>>(); | ||
subElements = Collections.synchronizedMap(_subElements); | ||
} | ||
|
||
protected HTMLElements.Element element; | ||
|
||
public HtmlElementDescription(Ruby runtime, RubyClass rubyClass) { | ||
super(runtime, rubyClass); | ||
} | ||
|
||
/** | ||
* Lookup the list of sub elements of <code>code</code>. If not | ||
* already stored, iterate through all elements to find valid | ||
* subelements; save this list and return it. | ||
*/ | ||
protected static List<String> findSubElements(HTMLElements.Element elem) { | ||
List<String> subs = subElements.get(elem.code); | ||
|
||
if (subs == null) { | ||
subs = new ArrayList<String>(); | ||
|
||
/* | ||
* A bit of a hack. NekoHtml source code shows that | ||
* UNKNOWN is the highest value element. We cannot access | ||
* the list of elements directly because it's protected. | ||
*/ | ||
for (short c = 0; c < HTMLElements.UNKNOWN; c++) { | ||
HTMLElements.Element maybe_sub = | ||
HTMLElements.getElement(c); | ||
if (maybe_sub.isParent(elem)) { | ||
subs.add(maybe_sub.name); | ||
} | ||
} | ||
|
||
subElements.put(elem.code, subs); | ||
} | ||
|
||
return subs; | ||
} | ||
|
||
@JRubyMethod(name="[]", meta=true) | ||
public static IRubyObject get(ThreadContext context, | ||
IRubyObject klazz, IRubyObject name) { | ||
|
||
HTMLElements.Element elem = HTMLElements.getElement(name.toString()); | ||
if (elem == HTMLElements.NO_SUCH_ELEMENT) | ||
return context.getRuntime().getNil(); | ||
|
||
HtmlElementDescription desc = | ||
new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz); | ||
desc.element = elem; | ||
return desc; | ||
} | ||
|
||
@JRubyMethod() | ||
public IRubyObject name(ThreadContext context) { | ||
return context.getRuntime().newString(element.name.toLowerCase()); | ||
} | ||
|
||
@JRubyMethod(name="inline?") | ||
public IRubyObject inline_eh(ThreadContext context) { | ||
return context.getRuntime().newBoolean(element.isInline()); | ||
} | ||
|
||
@JRubyMethod(name="empty?") | ||
public IRubyObject empty_eh(ThreadContext context) { | ||
return context.getRuntime().newBoolean(element.isEmpty()); | ||
} | ||
|
||
@JRubyMethod() | ||
public IRubyObject sub_elements(ThreadContext context) { | ||
Ruby ruby = context.getRuntime(); | ||
List<String> subs = findSubElements(element); | ||
IRubyObject[] ary = new IRubyObject[subs.size()]; | ||
for (int i = 0; i < subs.size(); ++i) { | ||
ary[i] = ruby.newString(subs.get(i)); | ||
} | ||
|
||
return ruby.newArray(ary); | ||
} | ||
|
||
} |
Oops, something went wrong.