Skip to content

Commit

Permalink
added a patch, should fix proycon/python-ucto#16
Browse files Browse the repository at this point in the history
  • Loading branch information
kosloot committed Apr 21, 2023
1 parent 06bdce3 commit 98e308c
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
7 changes: 4 additions & 3 deletions src/tokenize.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,6 @@ namespace Tokenizer {
bool last_had_space = true;
for ( const auto& pos : eos_posses ){
UnicodeString tmp = UnicodeString( in, prev+1, pos+1-prev );
// cerr << "TMP=" << tmp << endl;
if ( last_had_space ){
// new entry
result.push_back( tmp );
Expand Down Expand Up @@ -1020,6 +1019,7 @@ namespace Tokenizer {
}

folia::Document *TokenizerClass::tokenize( istream& IN ) {
reset(); // when starting a new inputfile, we must reset provenance et.al.
inputEncoding = checkBOM( IN );
folia::Document *doc = start_document( docid );
folia::FoliaElement *root = doc->doc()->index(0);
Expand Down Expand Up @@ -1073,8 +1073,9 @@ namespace Tokenizer {
delete doc;
}
else {
if ( ifile.empty() )
if ( ifile.empty() ){
IN = &cin;
}
else {
IN = new ifstream( ifile );
if ( !IN || !IN->good() ){
Expand Down Expand Up @@ -2924,7 +2925,7 @@ namespace Tokenizer {
DBG << "examine character: " << s << " type= "
<< toString( charT ) << endl;
}
if (reset) { //reset values for new word
if ( reset ) { //reset values for new word
reset = false;
tok_size = 0;
if ( !joiner && !is_separator(c) ){
Expand Down
2 changes: 0 additions & 2 deletions src/ucto.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -566,8 +566,6 @@ int main( int argc, char *argv[] ){
cerr << endl;
}
}


if (xmlin) {
folia::Document *doc = tokenizer.tokenize_folia( ifile );
if ( doc ){
Expand Down

0 comments on commit 98e308c

Please sign in to comment.