Skip to content

Commit

Permalink
feat(synonyms): big refactor of synonyms, added extensive synonyms ll…
Browse files Browse the repository at this point in the history
…ist for en,fr,es,de
  • Loading branch information
missinglink authored and orangejulius committed Jul 13, 2020
1 parent e557206 commit 00af1b9
Show file tree
Hide file tree
Showing 28 changed files with 1,699 additions and 359 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"@hapi/joi": "^16.1.8",
"colors": "^1.1.2",
"elasticsearch": "^16.0.0",
"glob": "^7.1.6",
"lodash": "^4.17.15",
"pelias-config": "^4.5.0",
"pelias-logger": "^1.3.0",
Expand Down
64 changes: 24 additions & 40 deletions settings.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
const _ = require('lodash');
const fs = require('fs');
const path = require('path');
const peliasConfig = require('pelias-config');
const punctuation = require('./punctuation');
const synonymParser = require('./synonyms/parser');
const synonymLinter = require('./synonyms/linter');

// load synonyms from disk
const synonyms = fs.readdirSync(path.join(__dirname, 'synonyms'))
.sort()
.filter( f => f.match(/\.txt$/) )
.reduce(( acc, cur ) => {
acc[cur.replace('.txt', '')] = synonymParser(
path.join(__dirname, 'synonyms', cur)
);
return acc;
}, {});

// emit synonym warnings
synonymLinter(synonyms);
const synonyms = require('./synonyms/loader').load();

require('./configValidation').validate(peliasConfig.generate());

Expand Down Expand Up @@ -52,7 +35,9 @@ function generate(){
"lowercase",
"icu_folding",
"trim",
"custom_admin",
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names",
"word_delimiter",
"unique_only_same_position",
"notnull",
Expand All @@ -67,12 +52,12 @@ function generate(){
"lowercase",
"icu_folding",
"trim",
"custom_name",
"street_synonyms_en",
"street_synonyms_usps",
"street_synonyms_de",
"directionals",
"ampersand",
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"peliasOneEdgeGramFilter",
Expand Down Expand Up @@ -103,12 +88,12 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"ampersand",
"custom_name",
"street_synonyms_en",
"street_synonyms_usps",
"street_synonyms_de",
"directionals",
"synonyms/punctuation",
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"icu_folding",
"remove_ordinals",
"unique_only_same_position",
Expand Down Expand Up @@ -153,11 +138,9 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"custom_street",
"street_synonyms_en",
"street_synonyms_usps",
"street_synonyms_de",
"directionals",
"synonyms/custom_street",
"synonyms/streets",
"synonyms/directionals",
"icu_folding",
"remove_ordinals",
"trim",
Expand Down Expand Up @@ -225,13 +208,14 @@ function generate(){
};

// dynamically create filters for all synonym files in the ./synonyms directory.
// each filter is given the same name as the file, minus the extension.
_.each(synonyms, (synonym, key) => {
settings.analysis.filter[key] = {
// each filter is given the same name as the file, paths separators are replaced with
// underscores and the file extension is removed.
_.each(synonyms, (synonym, name) => {
settings.analysis.filter[`synonyms/${name}`] = {
"type": "synonym",
"synonyms": !_.isEmpty(synonym) ? synonym : ['']
};
})
});

// Merge settings from pelias/config
settings = _.merge({}, settings, _.get(config, 'elasticsearch.settings', {}));
Expand Down
6 changes: 0 additions & 6 deletions synonyms/custom_admin.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,3 @@
# foo => foo bar, baz
#
# =============================================================================

saint,st
sainte,ste
fort,ft
mount,mt
mont,mt
147 changes: 0 additions & 147 deletions synonyms/custom_name.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,150 +23,3 @@
# foo => foo bar, baz
#
# =============================================================================

# English
brothers,bros
cape,cpe,cp
city,cty
creek,cr,crk
county,co,cty
downs,downes,dwns
flats,flts
forest,frst,fst
fort,ft
fords,frds
fork,frk
forks,frks
forge,frg
forges,frgs
glens,glns
great,grt,gt
greater,grtr,gtr
greens,grns
groves,grvs
heights,hghts,hgts,hieghts,ht,hts,hgths
international,intl
lake,lk
lakes,lks
little,ltl,lttl,littl,litl
lock,lck
locks,lcks
lower,low,lwr,lr
medical,med
memorial,mem
middle,mid,midl
military,mil
mount,mt,mnt
mountain,mtn
mountains,mtns
municipal,mun,mpal
national,natl
neck,nck
orchard,orch
paradise,pde,pdse
port,pt,prt
park,pk,prk
river,riv,rvr,rivr
slope,slpe,slp
springs,spgs,sprngs
stream,strm,stm
triangle,tri
upper,up,upr,uppr
village,vlg,vlge,vilg,vilge
ville,vl
villages,vlgs
wood,wd
woods,wds

# French
baston,bast
bourg,brg
charmille,chi
colline,coli
collines,colis
enceinte,en
fleuve,fl
grand,gd,gr
mont,mt,mnt
petite,pt
porche,pch
rivière,riviere,riv
village,vge
villages,vges

# German
deutsch,dt
ehemalige,ehem
gebruder,gebr
haltestelle,hst
hinter,hint,ht
internationale,int
kleine,kl
kleiner,kl
kleines,kl
kogel,kg
niedere,nd
rhein,rh
spitze,sp
vordere,vd,vord
wiese,ws

# Spanish
abril,abr,abl
agosto,ag,agto,agt
altura,alt
alturas,alts
arboleda,arb
arrabal,arral
bosque,bsq
brigada,brig
cabo,cbo
campo,cpo,cmpo
campos,cpos,cmpos
canal,cnl
centro,cntro,ctro
cerro,crro
corral,crral
corralillo,crrlo
diseminado,disem
enero,en,eno,ene
diciembre,dic,dicbre,dice,dbre,10bre,xbre
febrero,febo,febro,febr,feb
gobierno,gob,gobno
grande,gr
guerra,ga
independencia,indep
infantería,infanteria,infa,ynfa,ynfanta
jardín,jdin,jard,jardin
jardínes,jdins,jards,jardines
junio,jun,jn
julio,jul,jl
lago,lg
lagos,lgs
laguna,lgna
llanura,llnra
llanuras,llnras
marzo,mzo,mar
mayo,my,may
militar,milr
monte,mt,mte,mnte
montes,mts,mtes,mntes,mnts
nacional,nal,nacl
noviembre,nbre,nvre,nove,novre,novbre,9bre
octubre,oct,octbre,octe,8bre
portillo,ptilo,ptllo
prado,prdo
primeros,pros
privada,priv
punta,pnta
quebrada,qbda
real,rl
republica,rep
revolucion,rev
ribera,ribr
río,rio
septiembre,setbre,sepe,sepbre,7bre,7re,sep,set
sierra,srra
valle,vlle
volcan,vlcn
voluntarios,voluntos
11 changes: 0 additions & 11 deletions synonyms/directionals.txt

This file was deleted.

33 changes: 33 additions & 0 deletions synonyms/directionals/de.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
nord, n
nördlich, nördl, nordl, nordlich, noerdlich
nördliche, nordliche, noerdliche
nördlicher, nordlicher, noerdlicher
nördliches, nordliches, noerdliches
nordost, no
nordöstlich, nordostlich, nordoestlich
nordwest, nw
ost, o
östlich, östl, ostlich, ostl, oestlich
östliche, ostliche, oestliche
östlicher, ostlicher, oestlicher
östliches, ostliches, oestliches
süd, s, sud, sued
süden, suden, sueden
südlich, südl, sudl, sudlich, suedlich
südliche, sudliche, suedliche
südlicher, sudlicher, suedlicher
südliches, sudliches, suedliches
südost, so, sudost, suedost
südosten, sudosten, suedosten
südöstlich, sudostlich, suedoestlich
südöstliche, sudostliche, suedoestliche
südöstlicher, sudostlicher, suedoestlicher
südöstliches, sudostliches, suedoestliches
südwest, sw, sudwest, suedwest
südwesten, sudwesten, suedwesten
südwestlich, sudwestlich, suedwestlich
südwestliche, sudwestliche, suedwestliche
südwestlicher, sudwestlicher, suedwestlicher
südwestliches, sudwestliches, suedwestliches
west, w
westlich, westl
20 changes: 20 additions & 0 deletions synonyms/directionals/en.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
center, c, ctr
central, c, cn, ctrl, cntrl
centre, c, ctr
east, e
eastern, eastrn, estrn, estn
lower, lowr, lwr
middle, mdl, midle, mddl
north, n, nrt, nrth, nth, norh, nort, no
northeast, northe, neast, ne
northeastern, northeastrn, northestrn, northestn, neastern
northwest, northw, northwst, nwest
northwestern, northwestrn, northwstrn, northwstn
south, s, so, sth
southeast, southe, seast, se
southeastern, southeastrn, southestrn, southestn, seastern
southwest, southw, southwst, swest
southwestern, southwestrn, southwstrn, southwstn, swestern
upper, uppr, upr, up
west, w, wst
western, westrn, wstrn, wstn
10 changes: 10 additions & 0 deletions synonyms/directionals/es.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
este, e
norte, n
noreste, nordeste, ne
noroeste, nw
oeste, w
oriente, ote
poniente, pte
sur, s
sureste, se
suroeste, sw
6 changes: 6 additions & 0 deletions synonyms/directionals/fr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
central, ctrl
centre, c, ctre, cntre
est, e
nord, n
ouest, o
sud, s
Loading

0 comments on commit 00af1b9

Please sign in to comment.