-
Notifications
You must be signed in to change notification settings - Fork 10.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve copy/paste by inserting spaces into textChunks if we deem it …
…appropriate. Add test re same. PR #5783.
- Loading branch information
speedplane
committed
Mar 8, 2015
1 parent
fa0f09b
commit e9ad0b4
Showing
6 changed files
with
153 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -118,3 +118,4 @@ | |
!issue5481.pdf | ||
!issue5567.pdf | ||
!issue5701.pdf | ||
!US6205527_page1.pdf |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ | ||
/* globals PDFJS, expect, it, describe, Promise, combineUrl, waitsFor, | ||
isArray, MissingPDFException */ | ||
|
||
'use strict'; | ||
|
||
function waitsForPromiseResolved(promise, successCallback) { | ||
var data; | ||
promise.then(function(val) { | ||
data = val; | ||
successCallback(data); | ||
}, | ||
function(error) { | ||
// Shouldn't get here. | ||
expect(false).toEqual(true); | ||
}); | ||
waitsFor(function() { | ||
return data !== undefined; | ||
}, 20000); | ||
} | ||
|
||
function getPageOneOf(pdf) { | ||
var pdfURL = combineUrl(window.location.href, pdf); | ||
var resolvePromise; | ||
var pagePromise = new Promise(function (resolve) { | ||
resolvePromise = resolve; | ||
}); | ||
PDFJS.getDocument(pdfURL).then(function(doc) { | ||
doc.getPage(1).then(function(data) { | ||
resolvePromise(data); | ||
}); | ||
}); | ||
var page = { | ||
promise: pagePromise, | ||
page: page | ||
}; | ||
waitsForPromiseResolved(pagePromise, function(data) { | ||
page.page = data; | ||
}); | ||
return page; | ||
} | ||
|
||
describe('text-extract', function() { | ||
it('patent', function () { | ||
var page = getPageOneOf('../pdfs/US6205527_page1.pdf'); | ||
waitsForPromiseResolved(page.promise, function (data) { | ||
var textPromise = page.page.getTextContent(); | ||
waitsForPromiseResolved(textPromise, function (data) { | ||
expect(!!data.items).toEqual(true); | ||
var text = data.items.map(function (d) { return d.str; }).join(''); | ||
// Make sure the text is ordered properly. | ||
expect(text.indexOf('Disclosed is an apparatus, a system, a') > 0) | ||
.toEqual(true); | ||
expect(text.indexOf('device to the computer system; (b) preparing ' + | ||
'a storage. media of the peripheral storage') > 0).toEqual(true); | ||
}); | ||
}); | ||
}); | ||
|
||
it('tracemonkey', function () { | ||
var page = getPageOneOf('../pdfs/tracemonkey.pdf'); | ||
waitsForPromiseResolved(page.promise, function (data) { | ||
var textPromise = page.page.getTextContent(); | ||
waitsForPromiseResolved(textPromise, function (data) { | ||
expect(!!data.items).toEqual(true); | ||
var text = data.items.map(function (d) { return d.str; }).join(''); | ||
// Make sure the text is ordered properly. | ||
expect(text.indexOf('no concrete type information is available') > 0) | ||
.toEqual(true); | ||
expect(text.indexOf('difficult to com-pile than statically ') > 0) | ||
.toEqual(true); | ||
expect(text.indexOf('this work for personal or classroom use is') > 0) | ||
.toEqual(true); | ||
}); | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters