Skip to content

Commit

Permalink
Merge pull request #1518 from ampli/disjunct-count
Browse files Browse the repository at this point in the history
eliminate_duplicate_disjuncts(): Return the discarded disjunct count
  • Loading branch information
linas authored May 3, 2024
2 parents f393ef5 + a9f081c commit ea8c04e
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 13 deletions.
3 changes: 1 addition & 2 deletions link-grammar/dict-common/print-dict.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,8 +822,7 @@ static char *display_disjuncts(Dictionary dict, const Dict_node *dn,
max_cost, NULL);

unsigned int dnum0 = count_disjuncts(d);
d = eliminate_duplicate_disjuncts(d, false);
unsigned int dnum1 = count_disjuncts(d);
unsigned int dnum1 = dnum0 - eliminate_duplicate_disjuncts(d, false);

if ((flags != NULL) && (strchr(flags, 'm') != NULL))
{
Expand Down
13 changes: 9 additions & 4 deletions link-grammar/disjunct-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,15 @@ static void disjunct_dup_table_delete(disjunct_dup_table *dt)
}

/**
* Takes the list of disjuncts pointed to by d, eliminates all
* duplicates, and returns a pointer to a new list.
* Takes the list of disjuncts pointed to by dw, eliminates all
* duplicates. The elimination is done in-place. Because the first
* disjunct can never be eliminated (it cannot be a duplicate of
* anything before it), the argument disjunct list always points to the
* new list.
*
* @return The number of eliminated disjuncts.
*/
Disjunct *eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string)
unsigned int eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string)
{
unsigned int count = 0;
disjunct_dup_table *dt;
Expand Down Expand Up @@ -408,7 +413,7 @@ Disjunct *eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string)
multi_string ? " (different word-strings)" : "");

disjunct_dup_table_delete(dt);
return dw;
return count;
}

/* ============================================================= */
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/disjunct-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void free_categories(Sentence);
void free_categories_from_disjunct_array(Disjunct *, unsigned int);
unsigned int count_disjuncts(Disjunct *);
Disjunct * catenate_disjuncts(Disjunct *, Disjunct *);
Disjunct * eliminate_duplicate_disjuncts(Disjunct *, bool);
unsigned int eliminate_duplicate_disjuncts(Disjunct *, bool);
int left_connector_count(Disjunct *);
int right_connector_count(Disjunct *);

Expand Down
13 changes: 8 additions & 5 deletions link-grammar/parse/preparation.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,10 @@ static void create_wildcard_word_disjunct_list(Sentence sent,
build_sentence_disjuncts(wc_word_list, opts->disjunct_cost, opts);

Word *word0 = &wc_word_list->word[0];
word0->d = eliminate_duplicate_disjuncts(word0->d, false);
word0->d = eliminate_duplicate_disjuncts(word0->d, true);
unsigned int Ndeleted;
Ndeleted = eliminate_duplicate_disjuncts(word0->d, false);
Ndeleted += eliminate_duplicate_disjuncts(word0->d, true);
print_time(opts, "Eliminated duplicate disjuncts (%u deleted)", Ndeleted);

wc_word_list->min_len_encoding = 2; /* Don't share/encode. */
Tracon_sharing *t = pack_sentence_for_pruning(wc_word_list);
Expand Down Expand Up @@ -208,15 +210,16 @@ void prepare_to_parse(Sentence sent, Parse_Options opts)
}
print_time(opts, "Built disjuncts");

unsigned int Ndeleted = 0;
for (i=0; i<sent->length; i++)
{
sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d, false);
Ndeleted += eliminate_duplicate_disjuncts(sent->word[i].d, false);
if (IS_GENERATION(sent->dict))
{
if ((sent->word[i].d != NULL) && (sent->word[i].d->is_category != 0))
{
/* Also with different word_string. */
sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d, true);
Ndeleted += eliminate_duplicate_disjuncts(sent->word[i].d, true);

/* XXX This ordinal numbering is just plain wrong.
* Most of the disjuncts have already been pruned away,
Expand Down Expand Up @@ -248,7 +251,7 @@ void prepare_to_parse(Sentence sent, Parse_Options opts)
return;
#endif
}
print_time(opts, "Eliminated duplicate disjuncts");
print_time(opts, "Eliminated duplicate disjuncts (%u deleted)", Ndeleted);

if (verbosity_level(D_PREP))
{
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/resources.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ bool resources_memory_exhausted(Resources r)
else return (r->memory_exhausted || (get_space_in_use() > r->max_memory));
}

#define RES_COL_WIDTH 40
#define RES_COL_WIDTH 52

/** print out the cpu ticks since this was last called */
GNUC_PRINTF(2,0)
Expand Down
3 changes: 3 additions & 0 deletions link-grammar/tokenize/word-structures.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#ifndef _WORD_STRUCTURE_H_
#define _WORD_STRUCTURE_H_

#include <inttypes.h>
#include "stdbool.h"

#include "api-types.h"

typedef struct X_node_struct X_node;
Expand Down

0 comments on commit ea8c04e

Please sign in to comment.