Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support analyzer with custom tokenizer, only ngram for now. Also, add… #209

Merged
merged 1 commit into from
Dec 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/Database/V5/Bloodhound/Client.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module Database.V5.Bloodhound.Client
withBH
-- ** Indices
, createIndex
, createIndexWith
, deleteIndex
, updateIndexSettings
, getIndexSettings
Expand Down Expand Up @@ -512,6 +513,28 @@ createIndex indexSettings (IndexName indexName) =
where url = joinPath [indexName]
body = Just $ encode indexSettings

-- | Create an index, providing it with any number of settings. This
-- is more expressive than 'createIndex' but makes is more verbose
-- for the common case of configuring only the shard count and
-- replica count.
createIndexWith :: MonadBH m
=> [UpdatableIndexSetting]
-> Int -- ^ shard count
-> IndexName
-> m Reply
createIndexWith updates shards (IndexName indexName) =
bindM2 put url (return (Just body))
where url = joinPath [indexName]
body = encode $ object
["settings" .= deepMerge
( HM.singleton "index.number_of_shards" (toJSON shards) :
[u | Object u <- toJSON <$> updates]
)
]

oPath :: ToJSON a => NonEmpty Text -> a -> Value
oPath (k :| []) v = object [k .= v]
oPath (k:| (h:t)) v = object [k .= oPath (h :| t) v]

-- | 'deleteIndex' will delete an index given a 'Server', and an 'IndexName'.
--
Expand Down Expand Up @@ -545,7 +568,6 @@ getIndexSettings (IndexName indexName) = do
parseEsResponse =<< get =<< url
where url = joinPath [indexName, "_settings"]


-- | 'forceMergeIndex'
--
-- The force merge API allows to force merging of one or more indices through
Expand Down
98 changes: 97 additions & 1 deletion src/Database/V5/Bloodhound/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ module Database.V5.Bloodhound.Types
, ZeroTermsQuery(..)
, CutoffFrequency(..)
, Analyzer(..)
, Tokenizer(..)
, MaxExpansions(..)
, Lenient(..)
, MatchQueryType(..)
Expand Down Expand Up @@ -376,6 +377,12 @@ module Database.V5.Bloodhound.Types

, EsUsername(..)
, EsPassword(..)

, Analysis(..)
, AnalyzerDefinition(..)
, TokenizerDefinition(..)
, Ngram(..)
, TokenChar(..)
) where

import Control.Applicative as A
Expand Down Expand Up @@ -583,8 +590,89 @@ data UpdatableIndexSetting = NumberOfReplicas ReplicaCount
| IndexCompoundOnFlush Bool
| WarmerEnabled Bool
| MappingTotalFieldsLimit Int
| AnalysisSetting Analysis
-- ^ Analysis is not a dynamic setting and can only be performed on a closed index.
deriving (Eq, Show, Generic, Typeable)

data Analysis = Analysis
{ analysisAnalyzer :: M.Map Text AnalyzerDefinition
, analysisTokenizer :: M.Map Text TokenizerDefinition
} deriving (Eq,Show,Generic,Typeable)

instance ToJSON Analysis where
toJSON (Analysis analyzer tokenizer) = object
[ "analyzer" .= analyzer
, "tokenizer" .= tokenizer
]

instance FromJSON Analysis where
parseJSON = withObject "Analysis" $ \m -> Analysis
<$> m .: "analyzer"
<*> m .: "tokenizer"

data AnalyzerDefinition = AnalyzerDefinition
{ analyzerDefinitionTokenizer :: Maybe Tokenizer
} deriving (Eq,Show,Generic,Typeable)

instance ToJSON AnalyzerDefinition where
toJSON (AnalyzerDefinition tokenizer) = object $ catMaybes
[ fmap ("tokenizer" .=) tokenizer
]

instance FromJSON AnalyzerDefinition where
parseJSON = withObject "AnalyzerDefinition" $ \m -> AnalyzerDefinition
<$> m .:? "tokenizer"


data TokenizerDefinition
= TokenizerDefinitionNgram Ngram
deriving (Eq,Show,Generic,Typeable)

instance ToJSON TokenizerDefinition where
toJSON x = case x of
TokenizerDefinitionNgram (Ngram minGram maxGram tokenChars) -> object
[ "type" .= ("ngram" :: Text)
, "min_gram" .= minGram
, "max_gram" .= maxGram
, "token_chars" .= tokenChars
]

instance FromJSON TokenizerDefinition where
parseJSON = withObject "TokenizerDefinition" $ \m -> do
typ <- m .: "type" :: Parser Text
case typ of
"ngram" -> fmap TokenizerDefinitionNgram $ Ngram
<$> (fmap unStringlyTypedInt (m .: "min_gram"))
<*> (fmap unStringlyTypedInt (m .: "max_gram"))
<*> m .: "token_chars"
_ -> fail "invalid TokenizerDefinition"

data Ngram = Ngram
{ ngramMinGram :: Int
, ngramMaxGram :: Int
, ngramTokenChars :: [TokenChar]
} deriving (Eq,Show,Generic,Typeable)

data TokenChar = TokenLetter | TokenDigit | TokenWhitespace | TokenPunctuation | TokenSymbol
deriving (Eq,Read,Show,Generic,Typeable)

instance ToJSON TokenChar where
toJSON t = String $ case t of
TokenLetter -> "letter"
TokenDigit -> "digit"
TokenWhitespace -> "whitespace"
TokenPunctuation -> "punctuation"
TokenSymbol -> "symbol"

instance FromJSON TokenChar where
parseJSON = withText "TokenChar" $ \t -> case t of
"letter" -> return TokenLetter
"digit" -> return TokenDigit
"whitespace" -> return TokenWhitespace
"punctuation" -> return TokenPunctuation
"symbol" -> return TokenSymbol
_ -> fail "invalid TokenChar"

data AllocationPolicy = AllocAll
-- ^ Allows shard allocation for all shards.
| AllocPrimaries
Expand Down Expand Up @@ -1020,6 +1108,8 @@ newtype CutoffFrequency =
CutoffFrequency Double deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
newtype Analyzer =
Analyzer Text deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
newtype Tokenizer =
Tokenizer Text deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
newtype MaxExpansions =
MaxExpansions Int deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)

Expand Down Expand Up @@ -2990,6 +3080,7 @@ instance ToJSON UpdatableIndexSetting where
toJSON (BlocksWrite x) = oPath ("blocks" :| ["write"]) x
toJSON (BlocksMetaData x) = oPath ("blocks" :| ["metadata"]) x
toJSON (MappingTotalFieldsLimit x) = oPath ("index" :| ["mapping","total_fields","limit"]) x
toJSON (AnalysisSetting x) = oPath ("index" :| ["analysis"]) x

instance FromJSON UpdatableIndexSetting where
parseJSON = withObject "UpdatableIndexSetting" parse
Expand Down Expand Up @@ -3022,6 +3113,7 @@ instance FromJSON UpdatableIndexSetting where
<|> blocksWrite `taggedAt` ["blocks", "write"]
<|> blocksMetaData `taggedAt` ["blocks", "metadata"]
<|> mappingTotalFieldsLimit `taggedAt` ["index", "mapping", "total_fields", "limit"]
<|> analysisSetting `taggedAt` ["index", "analysis"]
where taggedAt f ks = taggedAt' f (Object o) ks
taggedAt' f v [] = f =<< (parseJSON v <|> (parseJSON (unStringlyTypeJSON v)))
taggedAt' f v (k:ks) = withObject "Object" (\o -> do v' <- o .: k
Expand Down Expand Up @@ -3055,6 +3147,7 @@ instance FromJSON UpdatableIndexSetting where
blocksWrite = pure . BlocksWrite
blocksMetaData = pure . BlocksMetaData
mappingTotalFieldsLimit = pure . MappingTotalFieldsLimit
analysisSetting = pure . AnalysisSetting

instance FromJSON IndexSettingsSummary where
parseJSON = withObject "IndexSettingsSummary" parse
Expand Down Expand Up @@ -4643,10 +4736,13 @@ instance FromJSON NodeDataPathStats where

newtype StringlyTypedDouble = StringlyTypedDouble { unStringlyTypedDouble :: Double }


instance FromJSON StringlyTypedDouble where
parseJSON = fmap StringlyTypedDouble . parseJSON . unStringlyTypeJSON

newtype StringlyTypedInt = StringlyTypedInt { unStringlyTypedInt :: Int }

instance FromJSON StringlyTypedInt where
parseJSON = fmap StringlyTypedInt . parseJSON . unStringlyTypeJSON

instance FromJSON NodeFSTotalStats where
parseJSON = withObject "NodeFSTotalStats" parse
Expand Down
26 changes: 26 additions & 0 deletions tests/V5/tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,12 @@ instance Arbitrary RegexpFlag where arbitrary = sopArbitrary; shrink = genericSh
instance Arbitrary BoolMatch where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Term where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary IndexSettings where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary TokenChar where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Ngram where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary TokenizerDefinition where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary AnalyzerDefinition where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Analysis where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Tokenizer where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary UpdatableIndexSetting where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Bytes where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary AllocationPolicy where arbitrary = sopArbitrary; shrink = genericShrink
Expand Down Expand Up @@ -1622,6 +1628,26 @@ main = hspec $ do
(IndexSettings (ShardCount 1) (ReplicaCount 0))
(NE.toList updates))

it "accepts customer analyzers" $ when' (atleast es50) $ withTestEnv $ do
_ <- deleteExampleIndex
let analysis = Analysis
(M.singleton "ex_analyzer" (AnalyzerDefinition (Just (Tokenizer "ex_tokenizer"))))
(M.singleton "ex_tokenizer"
( TokenizerDefinitionNgram
( Ngram 3 4 [TokenLetter,TokenDigit])
)
)
updates = [AnalysisSetting analysis]
createResp <- createIndexWith (updates ++ [NumberOfReplicas (ReplicaCount 0)]) 1 testIndex
liftIO $ validateStatus createResp 200
getResp <- getIndexSettings testIndex
liftIO $
getResp `shouldBe` Right (IndexSettingsSummary
testIndex
(IndexSettings (ShardCount 1) (ReplicaCount 0))
updates
)

describe "Index Optimization" $ do
it "returns a successful response upon completion" $ withTestEnv $ do
_ <- createExampleIndex
Expand Down