From 44107948b94682334c8cd3e55f7e8c9c28218c88 Mon Sep 17 00:00:00 2001 From: Ko van der Sloot Date: Fri, 21 Apr 2023 11:22:39 +0200 Subject: [PATCH] fixed a problem comparable to https://github.com/proycon/python-ucto/issues/16 (for XmlInput) --- src/tokenize.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tokenize.cxx b/src/tokenize.cxx index dd63350..7c910a1 100644 --- a/src/tokenize.cxx +++ b/src/tokenize.cxx @@ -1847,6 +1847,7 @@ namespace Tokenizer { } folia::Document *TokenizerClass::tokenize_folia( const string& infile_name ){ + reset(); // when starting a new inputfile, we must reset provenance et.al. if ( inputclass == outputclass && !doWordCorrection ){ DBG << "ucto: --filter=NO is automatically set. inputclass equals outputclass!"