Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Heap-buffer-overflow in GenericVector<int>::size (issue #2298) #2305

Merged
merged 1 commit into from
Mar 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions src/dict/dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,21 @@ class Dict {

// Returns true if unichar_id is a word compounding character like - or /.
inline bool compound_marker(UNICHAR_ID unichar_id) {
const UNICHARSET& unicharset = getUnicharset();
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset.normed_ids(unichar_id);
return normed_ids.size() == 1 &&
(normed_ids[0] == hyphen_unichar_id_ ||
normed_ids[0] == slash_unichar_id_);
}
// Returns true if unichar_id is an apostrophe-like character that may
// separate prefix/suffix words from a main body word.
inline bool is_apostrophe(UNICHAR_ID unichar_id) {
const UNICHARSET& unicharset = getUnicharset();
ASSERT_HOST(unicharset.contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset.normed_ids(unichar_id);
return normed_ids.size() == 1 && normed_ids[0] == apostrophe_unichar_id_;
}

Expand All @@ -141,17 +145,20 @@ class Dict {
}
}
/// Check whether the word has a hyphen at the end.
inline bool has_hyphen_end(UNICHAR_ID unichar_id, bool first_pos) const {
inline bool has_hyphen_end(const UNICHARSET* unicharset,
UNICHAR_ID unichar_id, bool first_pos) const {
if (!last_word_on_line_ || first_pos)
return false;
ASSERT_HOST(unicharset->contains_unichar_id(unichar_id));
const GenericVector<UNICHAR_ID>& normed_ids =
getUnicharset().normed_ids(unichar_id);
unicharset->normed_ids(unichar_id);
return normed_ids.size() == 1 && normed_ids[0] == hyphen_unichar_id_;
}
/// Same as above, but check the unichar at the end of the word.
inline bool has_hyphen_end(const WERD_CHOICE &word) const {
int word_index = word.length() - 1;
return has_hyphen_end(word.unichar_id(word_index), word_index == 0);
return has_hyphen_end(word.unicharset(), word.unichar_id(word_index),
word_index == 0);
}
/// Unless the previous word was the last one on the line, and the current
/// one is not (thus it is the first one on the line), erase hyphen_word_,
Expand Down
4 changes: 2 additions & 2 deletions src/wordrec/language_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// Description: Functions that utilize the knowledge about the properties,
// structure and statistics of the language to help recognition.
// Author: Daria Antonova
// Created: Mon Nov 11 11:26:43 PST 2009
//
// (C) Copyright 2009, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -803,7 +802,8 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
}

// Deal with hyphenated words.
if (word_end && dict_->has_hyphen_end(b.unichar_id(), curr_col == 0)) {
if (word_end && dict_->has_hyphen_end(&dict_->getUnicharset(),
b.unichar_id(), curr_col == 0)) {
if (language_model_debug_level > 0) tprintf("Hyphenated word found\n");
return new LanguageModelDawgInfo(dawg_args_.active_dawgs, COMPOUND_PERM);
}
Expand Down