diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 63c33fb28..7a9c7042c 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -822,8 +822,7 @@ static char *display_disjuncts(Dictionary dict, const Dict_node *dn, max_cost, NULL); unsigned int dnum0 = count_disjuncts(d); - d = eliminate_duplicate_disjuncts(d, false); - unsigned int dnum1 = count_disjuncts(d); + unsigned int dnum1 = dnum0 - eliminate_duplicate_disjuncts(d, false); if ((flags != NULL) && (strchr(flags, 'm') != NULL)) { diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index b5e456ab1..eb6fb906b 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -334,10 +334,15 @@ static void disjunct_dup_table_delete(disjunct_dup_table *dt) } /** - * Takes the list of disjuncts pointed to by d, eliminates all - * duplicates, and returns a pointer to a new list. + * Takes the list of disjuncts pointed to by dw, eliminates all + * duplicates. The elimination is done in-place. Because the first + * disjunct can never be eliminated (it cannot be a duplicate of + * anything before it), the argument disjunct list always points to the + * new list. + * + * @return The number of eliminated disjuncts. */ -Disjunct *eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string) +unsigned int eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string) { unsigned int count = 0; disjunct_dup_table *dt; @@ -408,7 +413,7 @@ Disjunct *eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string) multi_string ? " (different word-strings)" : ""); disjunct_dup_table_delete(dt); - return dw; + return count; } /* ============================================================= */ diff --git a/link-grammar/disjunct-utils.h b/link-grammar/disjunct-utils.h index e3ce480b1..bed949010 100644 --- a/link-grammar/disjunct-utils.h +++ b/link-grammar/disjunct-utils.h @@ -85,7 +85,7 @@ void free_categories(Sentence); void free_categories_from_disjunct_array(Disjunct *, unsigned int); unsigned int count_disjuncts(Disjunct *); Disjunct * catenate_disjuncts(Disjunct *, Disjunct *); -Disjunct * eliminate_duplicate_disjuncts(Disjunct *, bool); +unsigned int eliminate_duplicate_disjuncts(Disjunct *, bool); int left_connector_count(Disjunct *); int right_connector_count(Disjunct *); diff --git a/link-grammar/parse/preparation.c b/link-grammar/parse/preparation.c index fb75170f9..3ef6ade60 100644 --- a/link-grammar/parse/preparation.c +++ b/link-grammar/parse/preparation.c @@ -166,8 +166,10 @@ static void create_wildcard_word_disjunct_list(Sentence sent, build_sentence_disjuncts(wc_word_list, opts->disjunct_cost, opts); Word *word0 = &wc_word_list->word[0]; - word0->d = eliminate_duplicate_disjuncts(word0->d, false); - word0->d = eliminate_duplicate_disjuncts(word0->d, true); + unsigned int Ndeleted; + Ndeleted = eliminate_duplicate_disjuncts(word0->d, false); + Ndeleted += eliminate_duplicate_disjuncts(word0->d, true); + print_time(opts, "Eliminated duplicate disjuncts (%u deleted)", Ndeleted); wc_word_list->min_len_encoding = 2; /* Don't share/encode. */ Tracon_sharing *t = pack_sentence_for_pruning(wc_word_list); @@ -208,15 +210,16 @@ void prepare_to_parse(Sentence sent, Parse_Options opts) } print_time(opts, "Built disjuncts"); + unsigned int Ndeleted = 0; for (i=0; ilength; i++) { - sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d, false); + Ndeleted += eliminate_duplicate_disjuncts(sent->word[i].d, false); if (IS_GENERATION(sent->dict)) { if ((sent->word[i].d != NULL) && (sent->word[i].d->is_category != 0)) { /* Also with different word_string. */ - sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d, true); + Ndeleted += eliminate_duplicate_disjuncts(sent->word[i].d, true); /* XXX This ordinal numbering is just plain wrong. * Most of the disjuncts have already been pruned away, @@ -248,7 +251,7 @@ void prepare_to_parse(Sentence sent, Parse_Options opts) return; #endif } - print_time(opts, "Eliminated duplicate disjuncts"); + print_time(opts, "Eliminated duplicate disjuncts (%u deleted)", Ndeleted); if (verbosity_level(D_PREP)) { diff --git a/link-grammar/resources.c b/link-grammar/resources.c index 19da17771..b29c0655d 100644 --- a/link-grammar/resources.c +++ b/link-grammar/resources.c @@ -145,7 +145,7 @@ bool resources_memory_exhausted(Resources r) else return (r->memory_exhausted || (get_space_in_use() > r->max_memory)); } -#define RES_COL_WIDTH 40 +#define RES_COL_WIDTH 52 /** print out the cpu ticks since this was last called */ GNUC_PRINTF(2,0) diff --git a/link-grammar/tokenize/word-structures.h b/link-grammar/tokenize/word-structures.h index c4efcb75a..214cee29b 100644 --- a/link-grammar/tokenize/word-structures.h +++ b/link-grammar/tokenize/word-structures.h @@ -14,6 +14,9 @@ #ifndef _WORD_STRUCTURE_H_ #define _WORD_STRUCTURE_H_ +#include +#include "stdbool.h" + #include "api-types.h" typedef struct X_node_struct X_node;