Skip to content

Commit

Permalink
feat: ranked fuzzy match search of workspace symbols (#212)
Browse files Browse the repository at this point in the history
This is an implementation of a sequential fuzzy string matching algorithm, similar to those used in code editors like Sublime Text. 

It is based on Forrest Smith's work on https://github.com/forrestthewoods/lib_fts/ and his blog post https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
  • Loading branch information
biletskyy authored Sep 10, 2023
1 parent d91bd01 commit 9395744
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 63 deletions.
168 changes: 126 additions & 42 deletions lib/next_ls.ex
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,7 @@ defmodule NextLS do
end

def handle_request(%WorkspaceSymbol{params: %{query: query}}, lsp) do
filter = fn sym ->
if query == "" do
true
else
# TODO: sqlite has a regexp feature, this can be done in sql most likely
to_string(sym) =~ query
end
end
case_sensitive? = String.downcase(query) != query

symbols = fn pid ->
rows =
Expand Down Expand Up @@ -270,32 +263,35 @@ defmodule NextLS do

symbols =
dispatch(lsp.assigns.registry, :databases, fn entries ->
for {pid, _} <- entries, symbol <- symbols.(pid), filter.(symbol.name) do
name =
if symbol.type != "defstruct" do
"#{symbol.type} #{symbol.name}"
else
"#{symbol.name}"
end
filtered_symbols =
for {pid, _} <- entries, symbol <- symbols.(pid), score = fuzzy_match(symbol.name, query, case_sensitive?) do
name =
if symbol.type != "defstruct" do
"#{symbol.type} #{symbol.name}"
else
"#{symbol.name}"
end

{%SymbolInformation{
name: name,
kind: elixir_kind_to_lsp_kind(symbol.type),
location: %Location{
uri: "file://#{symbol.file}",
range: %Range{
start: %Position{
line: symbol.line - 1,
character: symbol.column - 1
},
end: %Position{
line: symbol.line - 1,
character: symbol.column - 1
}
}
}
}, score}
end

%SymbolInformation{
name: name,
kind: elixir_kind_to_lsp_kind(symbol.type),
location: %Location{
uri: "file://#{symbol.file}",
range: %Range{
start: %Position{
line: symbol.line - 1,
character: symbol.column - 1
},
end: %Position{
line: symbol.line - 1,
character: symbol.column - 1
}
}
}
}
end
filtered_symbols |> List.keysort(1, :desc) |> Enum.map(&elem(&1, 0))
end)

{:reply, symbols, lsp}
Expand Down Expand Up @@ -706,15 +702,14 @@ defmodule NextLS do
end

defp symbol_info(file, line, col, database) do
definition_query =
~Q"""
SELECT module, type, name
FROM "symbols" sym
WHERE sym.file = ?
AND sym.line = ?
ORDER BY sym.id ASC
LIMIT 1
"""
definition_query = ~Q"""
SELECT module, type, name
FROM "symbols" sym
WHERE sym.file = ?
AND sym.line = ?
ORDER BY sym.id ASC
LIMIT 1
"""

reference_query = ~Q"""
SELECT identifier, type, module
Expand Down Expand Up @@ -757,4 +752,93 @@ defmodule NextLS do
end

defp clamp(line), do: max(line, 0)

# This is an implementation of a sequential fuzzy string matching algorithm,
# similar to those used in code editors like Sublime Text.
# It is based on Forrest Smith's work on https://github.com/forrestthewoods/lib_fts/)
# and his blog post https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/.
#
# Function checks if letters from the query present in the source in correct order.
# It calculates match score only for matching sources.

defp fuzzy_match(_source, "", _case_sensitive), do: 1

defp fuzzy_match(source, query, case_sensitive) do
source_converted = if case_sensitive, do: source, else: String.downcase(source)
source_letters = String.codepoints(source_converted)
query_letters = String.codepoints(query)

if do_fuzzy_match?(source_letters, query_letters) do
source_anycase = String.codepoints(source)
source_downcase = query |> String.downcase() |> String.codepoints()

calc_match_score(source_anycase, source_downcase, %{leading: true, separator: true}, 0)
else
false
end
end

defp do_fuzzy_match?(_source_letters, []), do: true

defp do_fuzzy_match?(source_letters, [query_head | query_rest]) do
case match_letter(source_letters, query_head) do
:no_match -> false
rest_source_letters -> do_fuzzy_match?(rest_source_letters, query_rest)
end
end

defp match_letter([], _query_letter), do: :no_match

defp match_letter([source_letter | source_rest], query_letter) when query_letter == source_letter, do: source_rest

defp match_letter([_ | source_rest], query_letter), do: match_letter(source_rest, query_letter)

defp calc_match_score(_source_letters, [], _traits, score), do: score

defp calc_match_score(source_letters, [query_letter | query_rest], traits, score) do
{rest_source_letters, new_traits, new_score} = calc_letter_score(source_letters, query_letter, traits, score)

calc_match_score(rest_source_letters, query_rest, new_traits, new_score)
end

defp calc_letter_score([source_letter | source_rest], query_letter, traits, score) do
separator? = source_letter in ["_", ".", "-", "/", " "]
source_letter_downcase = String.downcase(source_letter)
upper? = source_letter_downcase != source_letter

if query_letter == source_letter_downcase do
new_traits = %{matched: true, leading: false, separator: separator?, upper: upper?}
new_score = calc_matched_bonus(score, traits, new_traits)

{source_rest, new_traits, new_score}
else
new_traits = %{
matched: false,
separator: separator?,
upper: upper?,
leading: traits.leading
}

new_score = calc_unmatched_penalty(score, traits)

calc_letter_score(source_rest, query_letter, new_traits, new_score)
end
end

# bonus if match occurs after a separator or on the first letter
defp calc_matched_bonus(score, %{separator: true}, _new_traits), do: score + 30

# bonus if match is uppercase and previous is lowercase
defp calc_matched_bonus(score, %{upper: false}, %{upper: true}), do: score + 30

# bonus for adjacent matches
defp calc_matched_bonus(score, %{matched: true}, _new_traits), do: score + 15

defp calc_matched_bonus(score, _traits, _new_traits), do: score

# penalty applied for every letter in str before the first match
defp calc_unmatched_penalty(score, %{leading: true}) when score > -15, do: score - 5

# penalty for unmatched letter
defp calc_unmatched_penalty(score, _traits), do: score - 1
end
96 changes: 75 additions & 21 deletions test/next_ls_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,40 @@ defmodule NextLSTest do
id: 2,
jsonrpc: "2.0",
params: %{
query: "fo"
query: "Project"
}
}

assert_result 2, symbols

assert [
%{
"kind" => 2,
"location" => %{
"range" => %{
"end" => %{"character" => 0, "line" => 0},
"start" => %{"character" => 0, "line" => 0}
},
"uri" => "file://#{cwd}/my_proj/lib/project.ex"
},
"name" => "defmodule Project"
}
] == symbols
end

test "workspace symbols with query fuzzy search", %{client: client, cwd: cwd} = context do
assert :ok == notify(client, %{method: "initialized", jsonrpc: "2.0", params: %{}})
assert_request(client, "client/registerCapability", fn _params -> nil end)

assert_is_ready(context, "my_proj")
assert_notification "$/progress", %{"value" => %{"kind" => "end", "message" => "Finished indexing!"}}

request client, %{
method: "workspace/symbol",
id: 2,
jsonrpc: "2.0",
params: %{
query: "heo"
}
}

Expand All @@ -345,35 +378,56 @@ defmodule NextLSTest do
"kind" => 12,
"location" => %{
"range" => %{
"start" => %{
"line" => 4,
"character" => 0
},
"end" => %{
"line" => 4,
"character" => 0
}
"end" => %{"character" => 0, "line" => 1},
"start" => %{"character" => 0, "line" => 1}
},
"uri" => "file://#{cwd}/my_proj/lib/project.ex"
},
"name" => "def hello"
}
] == symbols
end

test "workspace symbols with query case sensitive fuzzy search", %{client: client, cwd: cwd} = context do
assert :ok == notify(client, %{method: "initialized", jsonrpc: "2.0", params: %{}})
assert_request(client, "client/registerCapability", fn _params -> nil end)

assert_is_ready(context, "my_proj")
assert_notification "$/progress", %{"value" => %{"kind" => "end", "message" => "Finished indexing!"}}

request client, %{
method: "workspace/symbol",
id: 2,
jsonrpc: "2.0",
params: %{
query: "Ct"
}
}

assert_result 2, symbols

assert [
%{
"kind" => 2,
"location" => %{
"range" => %{
"end" => %{"character" => 0, "line" => 3},
"start" => %{"character" => 0, "line" => 3}
},
"uri" => "file://#{cwd}/my_proj/lib/code_action.ex"
},
"name" => "def foo"
"name" => "defmodule Foo.CodeAction.NestedMod"
},
%{
"kind" => 12,
"kind" => 2,
"location" => %{
"range" => %{
"start" => %{
"line" => 3,
"character" => 0
},
"end" => %{
"line" => 3,
"character" => 0
}
"end" => %{"character" => 0, "line" => 0},
"start" => %{"character" => 0, "line" => 0}
},
"uri" => "file://#{cwd}/my_proj/lib/bar.ex"
"uri" => "file://#{cwd}/my_proj/lib/code_action.ex"
},
"name" => "def foo"
"name" => "defmodule Foo.CodeAction"
}
] == symbols
end
Expand Down

0 comments on commit 9395744

Please sign in to comment.