Skip to content

Commit

Permalink
Fix assertion caused by wrong unicharset (issue #2301)
Browse files Browse the repository at this point in the history
Credit to OSS-Fuzz:
This fixes an issue which was reported by OSS-Fuzz, see details at
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=13592.

OSS-Fuzz triggered this assertion:

    contains_unichar_id(unichar_id):Error:Assert failed:in file ../../src/ccutil/unicharset.h, line 502

Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Mar 10, 2019
1 parent 0e72733 commit 91d0a71
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 11 deletions.
9 changes: 5 additions & 4 deletions src/dict/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@ const int case_state_table[6][4] = {
5, -1, 2, -1},
};

int Dict::case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const {
int Dict::case_ok(const WERD_CHOICE &word) const {
int state = 0;
int x;
const UNICHARSET* unicharset = word.unicharset();
for (x = 0; x < word.length(); ++x) {
UNICHAR_ID ch_id = word.unichar_id(x);
if (unicharset.get_isupper(ch_id))
if (unicharset->get_isupper(ch_id))
state = case_state_table[state][1];
else if (unicharset.get_islower(ch_id))
else if (unicharset->get_islower(ch_id))
state = case_state_table[state][2];
else if (unicharset.get_isdigit(ch_id))
else if (unicharset->get_isdigit(ch_id))
state = case_state_table[state][3];
else
state = case_state_table[state][0];
Expand Down
2 changes: 1 addition & 1 deletion src/dict/dict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
bool debug) {
bool is_han = (getUnicharset().han_sid() != getUnicharset().null_sid() &&
word->GetTopScriptID() == getUnicharset().han_sid());
bool case_is_ok = (is_han || case_ok(*word, getUnicharset()));
bool case_is_ok = (is_han || case_ok(*word));
bool punc_is_ok = (is_han || !nonword || valid_punctuation(*word));

float adjust_factor = additional_adjust;
Expand Down
2 changes: 1 addition & 1 deletion src/dict/dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ class Dict {
void SettupStopperPass2();
/* context.cpp *************************************************************/
/// Check a string to see if it matches a set of lexical rules.
int case_ok(const WERD_CHOICE &word, const UNICHARSET &unicharset) const;
int case_ok(const WERD_CHOICE& word) const;
/// Returns true if the word looks like an absolute garbage
/// (e.g. image mistakenly recognized as text).
bool absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharset);
Expand Down
8 changes: 3 additions & 5 deletions src/dict/stopper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
** Filename: stopper.c
** Purpose: Stopping criteria for word classifier.
** Author: Dan Johnson
** History: Mon Apr 29 14:56:49 1991, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -49,7 +48,7 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,

bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
bool is_case_ok = case_ok(best_choice, getUnicharset());
bool is_case_ok = case_ok(best_choice);

if (stopper_debug_level >= 1) {
const char *xht = "UNKNOWN";
Expand Down Expand Up @@ -107,15 +106,14 @@ bool Dict::AcceptableResult(WERD_RES *word) const {
tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
word->best_choice->debug_string().string(),
(valid_word(*word->best_choice) ? 'y' : 'n'),
(case_ok(*word->best_choice, getUnicharset()) ? 'y' : 'n'),
(case_ok(*word->best_choice) ? 'y' : 'n'),
word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
word->best_choices.singleton() ? 'n' : 'y');
}

if (word->best_choice->length() == 0 || !word->best_choices.singleton())
return false;
if (valid_word(*word->best_choice) &&
case_ok(*word->best_choice, getUnicharset())) {
if (valid_word(*word->best_choice) && case_ok(*word->best_choice)) {
WordSize = LengthOfShortestAlphaRun(*word->best_choice);
WordSize -= stopper_smallword_size;
if (WordSize < 0)
Expand Down

0 comments on commit 91d0a71

Please sign in to comment.