Skip to content

Commit

Permalink
Minor script optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
suddenrushofsushi committed Apr 5, 2016
1 parent 3c6e108 commit e495130
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 10 deletions.
Binary file modified Soupy.class
Binary file not shown.
10 changes: 6 additions & 4 deletions Soupy.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ public static void main(String args[]) throws IOException {
System.out.println(dir.toString());
File[] directoryListing = dir.listFiles();
StringBuilder buf = new StringBuilder();
String path;
Document doc;
Elements links;
if (directoryListing != null) {
for (File child : directoryListing) {
buf.setLength(0);
String path = child.getAbsolutePath();
String content = readFile(path, StandardCharsets.UTF_8);
Document doc = Jsoup.parse(content);
path = child.getAbsolutePath();
doc = Jsoup.parse(readFile(path, StandardCharsets.UTF_8));
doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
Elements links = doc.select("a");
links = doc.select("a");
for(Element a : links) {
buf.append(a.toString());
buf.append("\n");
Expand Down
3 changes: 2 additions & 1 deletion bench.groovy
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import org.jsoup.*
import org.jsoup.nodes.*

buf = new StringBuilder();
new File("/Users/sushi/Research/tie/html/test_files").eachFile() { file ->
buf.setLength(0)
new_path = file.getAbsolutePath().replace("test_files", "groovy_output")
content = file.getText('UTF-8')
doc = Jsoup.parse(content);
doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
links = doc.select("a");
buf = new StringBuilder();
links.each { a ->
buf << a.toString() //.append(a.toString());
buf << "\n" //.append("\n");
Expand Down
10 changes: 5 additions & 5 deletions bench.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
require 'nokogiri'

Dir.glob("/Users/sushi/Research/tie/html/test_files/*.html") do |f|
Dir.glob('/Users/sushi/Research/tie/html/test_files/*.html') do |f|
content = IO.read(f)
doc = Nokogiri::HTML(content)
list = []
doc.css('a').each do |e|
list << e.to_s
File.open(f.sub('test_files', 'mri_output'), 'w') do |of|
doc.css('a').each do |e|
of.write("#{e}\n")
end
end
IO.write(f.sub("test_files", "mri_output"), list.join("\n"))
end

0 comments on commit e495130

Please sign in to comment.