Skip to content

Commit

Permalink
Merge pull request #61 from UB-Mannheim/id-system
Browse files Browse the repository at this point in the history
Fix metadata check: ocr-id -> ocr-system, #59
  • Loading branch information
zuphilip authored Sep 16, 2016
2 parents dc629e9 + 0132d54 commit fff7c99
Show file tree
Hide file tree
Showing 10 changed files with 25 additions and 25 deletions.
6 changes: 3 additions & 3 deletions hocr-check
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ else: doc = html.parse(sys.stdin)
################################################################

# check for presence of meta information
test_ok(doc.xpath("//meta[@name='ocr-id']")!=[], "//meta[@name='ocr-id']")
test_ok(doc.xpath("//meta[@name='ocr-recognized']")!=[], "//meta[@name='ocr-recognized']")
test_ok(doc.xpath("//meta[@name='ocr-system']")!=[], "//meta[@name='ocr-system']")
test_ok(doc.xpath("//meta[@name='ocr-capabilites']")!=[], "//meta[@name='ocr-capabilites']")

# check for presence of page
test_ok(doc.xpath("//*[@class='ocr_page']")!=[], "has a page")
Expand Down Expand Up @@ -136,7 +136,7 @@ if not nooverlap:

# FIXME add many other checks:
# - containment of paragraphs, careas, etc.
# - ocr-recognized vs. actual tags
# - ocr-capabilites vs. actual tags
# - warn about text outside ocr_ elements
# - check title= attribute format
# - check that only the right attributes are present on the right elements
Expand Down
4 changes: 2 additions & 2 deletions test/hocr-check/ancestor/notok-carea.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head>
<meta name='ocr-id' content="foo"/>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page"></span>
Expand Down
4 changes: 2 additions & 2 deletions test/hocr-check/ancestor/notok-line.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head>
<meta name='ocr-id' content="foo"/>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page"></span>
Expand Down
4 changes: 2 additions & 2 deletions test/hocr-check/ancestor/notok-par.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head>
<meta name='ocr-id' content="foo"/>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page"></span>
Expand Down
4 changes: 2 additions & 2 deletions test/hocr-check/ancestor/ok-column.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head>
<meta name='ocr-id' content="foo"/>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page">
Expand Down
4 changes: 2 additions & 2 deletions test/hocr-check/ancestor/ok-line.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head>
<meta name='ocr-id' content="foo"/>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page">
Expand Down
4 changes: 2 additions & 2 deletions test/hocr-check/ancestor/ok-par.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<html>
<head>
<meta name='ocr-id' content="foo"/>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page">
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<html>
<head>
<meta name='ocr-recognized' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page"></span>
Expand Down
9 changes: 0 additions & 9 deletions test/hocr-check/meta/ok-id.html

This file was deleted.

9 changes: 9 additions & 0 deletions test/hocr-check/meta/ok-system.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<html>
<head>
<meta name='ocr-system' content="foo"/>
<meta name='ocr-capabilites' content="foo"/>
</head>
<body>
<span class="ocr_page"> </span>
</body>
</html>

0 comments on commit fff7c99

Please sign in to comment.