Skip to content

Commit

Permalink
Merge pull request #343 from jonathanhefner/search-adjust-ranking-mod…
Browse files Browse the repository at this point in the history
…ule-vs-method

Adjust module vs method ranking in search results
  • Loading branch information
jonathanhefner authored Oct 26, 2023
2 parents de49d9e + b482244 commit 2c29dfb
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 84 deletions.
60 changes: 26 additions & 34 deletions lib/sdoc/search_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,28 @@ def generate(rdoc_modules)
end

def derive_ngrams(name)
# Example: "ActiveSupport::Cache::Store" => ":ActiveSupport:Cache:Store"
strings = [":#{name}".gsub("::", ":")]
if name.match?(/:[^:A-Z]|#/)
# Example: "ActiveModel::Name::new" => ["ActiveModel", "Name", ":new"]
# Example: "ActiveModel::Name#<=>" => ["ActiveModel", "Name", "#<=>"]
strings = name.split(/::(?=[A-Z])|:(?=:)|(?=#)/)

# Example: ":lookup_store" => ".lookup_store("
strings.concat(strings.map { |string| string.sub(/^[:#](.+)/, '.\1(') })
else
# Example: "ActiveSupport::Cache::Store" => [":ActiveSupport", ":Cache, ":Store"]
strings = ":#{name}".split(/:(?=:)/)
end

# Example: ":ActiveSupport:Cache:lookup_store" => ":ActiveSupport:Cache.lookup_store("
strings.concat(strings.map { |string| string.gsub(/[:#]([^A-Z].+)/, '.\1(') })
# Example: ":ActiveModel:API" => ":activemodel:api"
# Example: ":API" => ":api"
strings.concat(strings.map(&:downcase))
# Example: ":ActiveSupport:HashWithIndifferentAccess" => ":AS:HWIA"
# Example: ":HashWithIndifferentAccess" => ":HWIA"
strings.concat(strings.map { |string| string.gsub(/([A-Z])[a-z]+/, '\1') })
# Example: ":AbstractController:Base#action_name" => " AbstractController Base action_name"
strings.concat(strings.map { |string| string.tr(":#", " ") })
# Example: ":ActiveRecord:Querying#find_by_sql" => ":ActiveRecord:Querying#findbysql"
# Example: "#find_by_sql" => "#findbysql"
strings.concat(strings.map { |string| string.tr("_", "") })

# Example: ":ActiveModel:Name#<=>" => [":ActiveModel", ":Name", "#<=>"]
strings.map! { |string| string.split(/(?=[ :#.])/) }.flatten!.uniq!
# Example: ":ActiveModel" => ":A "
strings.concat(strings.map { |string| "#{string[0, 2]} " })
# Example: "#action_name" => " action_name"
strings.concat(strings.map { |string| string.tr(":#", " ") })
# Example: " action_name" => " a "
strings.concat(strings.map { |string| string.sub(/^([:# ].).+/, '\1 ') })

strings.flat_map { |string| string.each_char.each_cons(3).map(&:join) }.uniq
end
Expand Down Expand Up @@ -97,26 +101,14 @@ def compute_bit_weights(ngram_bit_positions)
end

def compute_tiebreaker_bonus(module_name, method_name, description)
method_name ||= ""

# Bonus is per matching ngram and is very small so it does not outweigh
# points from other matches. Longer names have smaller per-ngram bonuses,
# but the value scales down very slowly.
bonus = 0.01 / (module_name.length + method_name.length) ** 0.025

# Further reduce bonus in proportion to method name length. This prioritizes
# modules before methods, and short methods of long modules before long
# methods of short modules. For example, when searching for "find_by", this
# prioritizes ActiveRecord::FinderMethods#find_by before
# ActiveRecord::Querying#find_by_sql.
#
# However, slightly dampen the reduction in proportion to the length of the
# method description. When method names are the same, this marginally favors
# methods with more documentation over methods with less documentation. For
# example, favoring ActionController::Rendering#render (which is thoroughly
# documented) over ActionController::Renderer#render (which primarily refers
# to other methods).
bonus *= (0.99 + [description.length, 1000].min / 250_000.0) ** method_name.length
# Give bonus in proportion to documentation length, but scale up extremely
# slowly. Bonus is per matching ngram so it must be small enough to not
# outweigh points from other matches.
bonus = (description.length + 1) ** 0.01 / 100
# Reduce bonus in proportion to name length. This favors short names over
# long names. Notably, this will often favor methods over modules since
# method names are usually shorter than fully qualified module names.
bonus /= (method_name&.length || module_name.length) ** 0.1
end

def signature_for(rdoc_method)
Expand Down
81 changes: 31 additions & 50 deletions spec/search_index_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def hoge_fuga; end
end
RUBY

ngrams = SDoc::SearchIndex.derive_ngrams("FooBar#hoge_fuga")
ngrams = SDoc::SearchIndex.derive_ngrams("FooBar") | SDoc::SearchIndex.derive_ngrams("FooBar#hoge_fuga")

search_index = SDoc::SearchIndex.generate(top_level.classes_and_modules)

Expand Down Expand Up @@ -54,54 +54,51 @@ def hoge_fuga; end
end

it "includes module-related ngrams" do
ngrams = SDoc::SearchIndex.derive_ngrams("Abc::Xyz")
ngrams = SDoc::SearchIndex.derive_ngrams("Abc::Def")

_(ngrams.map(&:length).uniq.first).must_equal 3

_(ngrams).must_include ":Ab"
_(ngrams).must_include ":Xy"
_(ngrams).must_include ":A "
_(ngrams).must_include " Ab"
_(ngrams).must_include " A "

_(ngrams).must_include ":De"
_(ngrams).must_include ":D "
_(ngrams).must_include " De"
_(ngrams).must_include " D "

_(ngrams.grep(/c:|::|[.(]/)).must_be_empty
_(ngrams.grep(/.:|[^: ]. |[.(]/)).must_be_empty
end

it "includes method-related ngrams for instance methods" do
ngrams = SDoc::SearchIndex.derive_ngrams("Abc#def_xyz")

_(ngrams).must_include "#de"
_(ngrams).must_include ".de"
_(ngrams).must_include "yz("
ngrams = SDoc::SearchIndex.derive_ngrams("Abc::Def#uvw_xyz")

_(ngrams).must_include "f_x"
_(ngrams).must_include "efx"
_(ngrams).must_include "fxy"
_(ngrams.map(&:length).uniq.first).must_equal 3

_(ngrams.grep(/c#/)).must_be_empty
end
_(ngrams).must_include "#uv"
_(ngrams).must_include "#u "
_(ngrams).must_include " uv"
_(ngrams).must_include " u "

it "includes method-related ngrams for singleton methods" do
ngrams = SDoc::SearchIndex.derive_ngrams("Abc::def_xyz")

_(ngrams).must_include ":de"
_(ngrams).must_include ".de"
_(ngrams).must_include ".uv"
_(ngrams).must_include "yz("

_(ngrams.grep(/c:/)).must_be_empty
end
_(ngrams).must_include "w_x"
_(ngrams).must_include "vwx"
_(ngrams).must_include "wxy"

it "includes space delimiter ngrams" do
ngrams = SDoc::SearchIndex.derive_ngrams("Abc::Def#xyz")
_(ngrams.grep(/.#|[^:# ]. /)).must_be_empty

_(ngrams).must_include " Ab"
_(ngrams).must_include " A "
_(ngrams).must_include ":A "

_(ngrams).must_include " De"
_(ngrams).must_include " D "
_(ngrams).must_include ":D "
ngrams_from_module = SDoc::SearchIndex.derive_ngrams("Abc::Def")
_((ngrams & ngrams_from_module).sort).must_equal ngrams_from_module.grep_v(/[: ][A-F]/i).sort
end

_(ngrams).must_include " xy"
_(ngrams).must_include " x "
_(ngrams).must_include "#x "
it "includes method-related ngrams for singleton methods" do
ngrams = SDoc::SearchIndex.derive_ngrams("Abc::Def::uvw_xyz")

_(ngrams.grep(/[cfz] $/)).must_be_empty
instance_method_ngrams = SDoc::SearchIndex.derive_ngrams("Abc::Def#uvw_xyz")
_(ngrams.sort).must_equal instance_method_ngrams.map { _1.tr("#", ":") }.sort
end

it "includes acronym ngrams" do
Expand Down Expand Up @@ -213,20 +210,6 @@ def hoge_fuga; end
end
end

it "favors long module + short method names over short module + long method names" do
[
[ ["ActiveRecord::Associations::ClassMethods", "has_many"],
["ActiveStorage::Attached::Model", "has_many_attached"] ],
[ ["ActiveRecord::FinderMethods", "find_by"],
["ActiveRecord::Querying", "find_by_sql"] ],
].each do |long_short, short_long|
long_short_bonus = SDoc::SearchIndex.compute_tiebreaker_bonus(*long_short, "")
short_long_bonus = SDoc::SearchIndex.compute_tiebreaker_bonus(*short_long, "")

_(long_short_bonus).must_be :>, short_long_bonus, "#{long_short.join "#"} vs #{short_long.join "#"}"
end
end

it "favors methods with long documentation over methods with short documentation" do
[
[ ["X", "x", 2],
Expand All @@ -246,8 +229,6 @@ def hoge_fuga; end

it "balances factors to produce desirable results" do
[
[ ["ActiveSupport::Deprecation", nil, 0],
["Module", "deprecate", 600] ],
[ ["Pathname", "existence", 200],
["ActiveSupport::Callbacks::CallTemplate::InstanceExec1", "expand", 0] ],
[ ["ActiveRecord::Associations::ClassMethods", "has_many", 12000],
Expand Down

0 comments on commit 2c29dfb

Please sign in to comment.