From 485a181d0f651ac50983ab8539817ed25e786fd7 Mon Sep 17 00:00:00 2001 From: Oleg Ilyenko Date: Sat, 30 Apr 2016 16:20:34 +0200 Subject: [PATCH] `suggestionList` implementation (#126) --- src/main/scala/sangria/util/StringUtil.scala | 48 +++++++++++++++++++ .../scala/sangria/util/StringUtilSpec.scala | 14 ++++++ 2 files changed, 62 insertions(+) diff --git a/src/main/scala/sangria/util/StringUtil.scala b/src/main/scala/sangria/util/StringUtil.scala index d697dc29..80c46330 100644 --- a/src/main/scala/sangria/util/StringUtil.scala +++ b/src/main/scala/sangria/util/StringUtil.scala @@ -1,5 +1,7 @@ package sangria.util +import scala.collection.mutable.ListBuffer + object StringUtil { private val camelToUpper = "_*([A-Z][a-z\\d]+)".r @@ -20,4 +22,50 @@ object StringUtil { if (start.nonEmpty) s"${start mkString ", "} or $last" else last } + + /** + * Given an invalid input string and a list of valid options, returns a filtered + * list of valid options sorted based on their similarity with the input. + */ + def suggestionList(input: String, options: Seq[String]): Seq[String] = { + val inputThreshold = input.length / 2 + + options + .map (opt ⇒ opt → lexicalDistance(input, opt)) + .filter (opt ⇒ opt._2 <= math.max(math.max(inputThreshold, opt._1.length / 2), 1)) + .sortBy (_._2) + .map (_._1) + } + + /** + * Computes the lexical distance between strings A and B. + * + * The "distance" between two strings is given by counting the minimum number + * of edits needed to transform string A into string B. An edit can be an + * insertion, deletion, or substitution of a single character, or a swap of two + * adjacent characters. + * + * This distance can be useful for detecting typos in input or sorting + * + * @return distance in number of edits + */ + def lexicalDistance(a: String, b: String): Int = { + val d = for (i ← 0 to a.length) yield ListBuffer.fill(b.length + 1)(i) + + for (j ← 1 to b.length) { + d(0)(j) = j + } + + for (i ← 1 to a.length; j ← 1 to b.length) { + val cost = if (a(i - 1) == b(j - 1)) 0 else 1 + + d(i)(j) = math.min(math.min(d(i - 1)(j) + 1, d(i)(j - 1) + 1), d(i - 1)(j - 1) + cost) + + if (i > 1 && j > 1 && a(i - 1) == b(j - 2) && a(i - 2) == b(j - 1)) { + d(i)(j) = math.min(d(i)(j), d(i - 2)(j - 2) + cost) + } + } + + d(a.length)(b.length) + } } diff --git a/src/test/scala/sangria/util/StringUtilSpec.scala b/src/test/scala/sangria/util/StringUtilSpec.scala index 40909a23..0975a51c 100644 --- a/src/test/scala/sangria/util/StringUtilSpec.scala +++ b/src/test/scala/sangria/util/StringUtilSpec.scala @@ -34,4 +34,18 @@ class StringUtilSpec extends WordSpec with Matchers { quotedOrList(Seq("A", "B", "C", "D", "E", "F")) should be ("\"A\", \"B\", \"C\", \"D\" or \"E\"") } } + + "suggestionList" should { + "Returns results when input is empty" in { + suggestionList("", Seq("a")) should be (Seq("a")) + } + + "Returns empty array when there are no options" in { + suggestionList("input", Seq.empty) should be (Seq.empty) + } + + "Returns options sorted based on similarity" in { + suggestionList("abc", Seq("a", "ab", "abc")) should be (Seq("abc", "ab")) + } + } }