From f0e6e01cccdd8e45f231b6ea24f44fcf055e4831 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Wed, 28 Jan 2015 00:44:43 +0000 Subject: [PATCH 1/6] basic fuzzy searching in help mode --- base/docs.jl | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/base/docs.jl b/base/docs.jl index d859d5644c124..e95eb7ab014da 100644 --- a/base/docs.jl +++ b/base/docs.jl @@ -353,6 +353,14 @@ catdoc(md::MD...) = MD(md...) macro repl (ex) quote + # Fuzzy Searching + $(if isexpr(ex, Symbol) + n = string(ex) + pre = "search:" + :(print($pre); + printmatches($n, completions($n), cols=Base.tty_size()[2]-$(length(pre))); + println("\n")) + end) # Backwards-compatible with the previous help system, for now let doc = @doc $(esc(ex)) doc ≠ nothing ? doc : Base.Help.@help_ $(esc(ex)) @@ -360,4 +368,147 @@ macro repl (ex) end end +# Search & Rescue +# Utilities for correcting user mistakes and (eventually) +# doing full documentation searches from the repl. + +# Fuzzy Search Algorithm + +function matchinds(needle, haystack; acronym = false) + chars = collect(needle) + is = Int[] + lastc = '\0' + for (i, char) in enumerate(haystack) + isempty(chars) && break + while chars[1] == ' ' shift!(chars) end # skip spaces + if lowercase(char) == lowercase(chars[1]) && (!acronym || !isalpha(lastc)) + push!(is, i) + shift!(chars) + end + lastc = char + end + return is +end + +longer(x, y) = length(x) ≥ length(y) ? (x, true) : (y, false) + +bestmatch(needle, haystack) = + longer(matchinds(needle, haystack, acronym = true), + matchinds(needle, haystack)) + +avgdistance(xs) = + isempty(xs) ? 0 : + (xs[end] - xs[1] - length(xs)+1)/length(xs) + +function fuzzyscore(needle, haystack; shorter = true) + score = 0. + is, acro = bestmatch(needle, haystack) + score += (acro?2:1)length(is) # Matched characters + score -= 2(length(needle)-length(is)) # Missing characters + !acro && (score -= avgdistance(is)/10) # Contiguous + !isempty(is) && (score -= mean(is)/100) # Closer to beginning + score += (shorter ? -1 : 1)length(haystack)/1000 # Shorter/longer words +end + +function fuzzysort(search, candidates; shorter = true) + scores = map(cand -> fuzzyscore(search, cand, shorter=shorter), candidates) + candidates[sortperm(scores)] |> reverse +end + +# Levenshtein Distance + +function levenshtein(s1, s2) + a, b = collect(s1), collect(s2) + m = length(a) + n = length(b) + d = Array(Int, m+1, n+1) + + d[1:m+1, 1] = 0:m + d[1, 1:n+1] = 0:n + + for i = 1:m, j = 1:n + d[i+1,j+1] = min(d[i , j+1] + 1, + d[i+1, j ] + 1, + d[i , j ] + (a[i] != b[j])) + end + + return d[m+1, n+1] +end + +function levsort(search, candidates) + scores = map(cand -> levenshtein(search, cand), candidates) + candidates[sortperm(scores)] +end + +# Result printing + +function printmatch(io::IO, word, match) + is, _ = bestmatch(word, match) + Markdown.with_output_format(:fade, io) do io + for (i, char) = enumerate(match) + if i in is + Markdown.with_output_format(print, :bold, io, char) + else + print(io, char) + end + end + end +end + +printmatch(args...) = printfuzzy(STDOUT, args...) + +function printmatches(io::IO, word, matches; cols = Base.tty_size()[2]) + total = 0 + for match in matches + total + length(match) + 1 > cols && break + fuzzyscore(word, match) < 0 && break + print(io, " ") + printmatch(io, word, match) + total += length(match) + 1 + end +end + +printmatches(args...; cols = Base.tty_size()[2]) = printmatches(STDOUT, args..., cols = cols) + +function print_joined_cols(io::IO, ss, delim = "", last = delim; cols = Base.tty_size()[2]) + i = 0 + total = 0 + for i = 1:length(ss) + total += length(ss[i]) + total + max(i-2,0)*length(delim) + (i>1?1:0)*length(last) > cols && (i-=1; break) + end + print_joined(io, ss[1:i], delim, last) +end + +print_joined_cols(args...; cols = Base.tty_size()[2]) = print_joined_cols(STDOUT, args...; cols=cols) + +function print_correction(word) + cors = levsort(word, accessible(current_module())) + pre = "Perhaps you meant " + print(pre) + print_joined_cols(cors, ", ", " or "; cols = Base.tty_size()[2]-length(pre)) + return +end + +# Completion data + +const builtins = ["abstract", "baremodule", "begin", "bitstype", "break", + "catch", "ccall", "const", "continue", "do", "else", + "elseif", "end", "export", "finally", "for", "function", + "global", "if", "immutable", "import", "importall", "let", + "local", "macro", "module", "quote", "return", "try", "type", + "typealias", "using", "while"] + +moduleusings(mod) = ccall(:jl_module_usings, Any, (Any,), mod) + +filtervalid(names) = filter(x->!ismatch(r"#", x), map(string, names)) + +accessible(mod::Module) = + [names(mod, true, true), + map(names, moduleusings(mod))..., + builtins] |> unique |> filtervalid + +completions(name) = fuzzysort(name, accessible(current_module())) +completions(name::Symbol) = completions(string(name)) + end From d908684730c629376e3f48a86dc51a23864ae91c Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Wed, 28 Jan 2015 00:55:02 +0000 Subject: [PATCH 2/6] whitespace again --- base/docs.jl | 157 ++++++++++++++++++++++++++------------------------- 1 file changed, 79 insertions(+), 78 deletions(-) diff --git a/base/docs.jl b/base/docs.jl index e95eb7ab014da..1985a02ef8283 100644 --- a/base/docs.jl +++ b/base/docs.jl @@ -355,11 +355,11 @@ macro repl (ex) quote # Fuzzy Searching $(if isexpr(ex, Symbol) - n = string(ex) - pre = "search:" - :(print($pre); - printmatches($n, completions($n), cols=Base.tty_size()[2]-$(length(pre))); - println("\n")) + n = string(ex) + pre = "search:" + :(print($pre); + printmatches($n, completions($n), cols=Base.tty_size()[2]-$(length(pre))); + println("\n")) end) # Backwards-compatible with the previous help system, for now let doc = @doc $(esc(ex)) @@ -375,119 +375,120 @@ end # Fuzzy Search Algorithm function matchinds(needle, haystack; acronym = false) - chars = collect(needle) - is = Int[] - lastc = '\0' - for (i, char) in enumerate(haystack) - isempty(chars) && break - while chars[1] == ' ' shift!(chars) end # skip spaces - if lowercase(char) == lowercase(chars[1]) && (!acronym || !isalpha(lastc)) - push!(is, i) - shift!(chars) + chars = collect(needle) + is = Int[] + lastc = '\0' + for (i, char) in enumerate(haystack) + isempty(chars) && break + while chars[1] == ' ' shift!(chars) end # skip spaces + if lowercase(char) == lowercase(chars[1]) && (!acronym || !isalpha(lastc)) + push!(is, i) + shift!(chars) + end + lastc = char end - lastc = char - end - return is + return is end longer(x, y) = length(x) ≥ length(y) ? (x, true) : (y, false) bestmatch(needle, haystack) = - longer(matchinds(needle, haystack, acronym = true), - matchinds(needle, haystack)) + longer(matchinds(needle, haystack, acronym = true), + matchinds(needle, haystack)) avgdistance(xs) = - isempty(xs) ? 0 : - (xs[end] - xs[1] - length(xs)+1)/length(xs) + isempty(xs) ? 0 : + (xs[end] - xs[1] - length(xs)+1)/length(xs) function fuzzyscore(needle, haystack; shorter = true) - score = 0. - is, acro = bestmatch(needle, haystack) - score += (acro?2:1)length(is) # Matched characters - score -= 2(length(needle)-length(is)) # Missing characters - !acro && (score -= avgdistance(is)/10) # Contiguous - !isempty(is) && (score -= mean(is)/100) # Closer to beginning - score += (shorter ? -1 : 1)length(haystack)/1000 # Shorter/longer words + score = 0. + is, acro = bestmatch(needle, haystack) + score += (acro?2:1)length(is) # Matched characters + score -= 2(length(needle)-length(is)) # Missing characters + !acro && (score -= avgdistance(is)/10) # Contiguous + !isempty(is) && (score -= mean(is)/100) # Closer to beginning + score += (shorter ? -1 : 1)length(haystack)/1000 # Shorter/longer words end function fuzzysort(search, candidates; shorter = true) - scores = map(cand -> fuzzyscore(search, cand, shorter=shorter), candidates) - candidates[sortperm(scores)] |> reverse + scores = map(cand -> fuzzyscore(search, cand, shorter=shorter), candidates) + candidates[sortperm(scores)] |> reverse end # Levenshtein Distance function levenshtein(s1, s2) - a, b = collect(s1), collect(s2) - m = length(a) - n = length(b) - d = Array(Int, m+1, n+1) - - d[1:m+1, 1] = 0:m - d[1, 1:n+1] = 0:n - - for i = 1:m, j = 1:n - d[i+1,j+1] = min(d[i , j+1] + 1, - d[i+1, j ] + 1, - d[i , j ] + (a[i] != b[j])) - end + a, b = collect(s1), collect(s2) + m = length(a) + n = length(b) + d = Array(Int, m+1, n+1) + + d[1:m+1, 1] = 0:m + d[1, 1:n+1] = 0:n + + for i = 1:m, j = 1:n + d[i+1,j+1] = min(d[i , j+1] + 1, + d[i+1, j ] + 1, + d[i , j ] + (a[i] != b[j])) + end - return d[m+1, n+1] + return d[m+1, n+1] end function levsort(search, candidates) - scores = map(cand -> levenshtein(search, cand), candidates) - candidates[sortperm(scores)] + scores = map(cand -> levenshtein(search, cand), candidates) + candidates[sortperm(scores)] end # Result printing function printmatch(io::IO, word, match) - is, _ = bestmatch(word, match) - Markdown.with_output_format(:fade, io) do io - for (i, char) = enumerate(match) - if i in is - Markdown.with_output_format(print, :bold, io, char) - else - print(io, char) - end + is, _ = bestmatch(word, match) + Markdown.with_output_format(:fade, io) do io + for (i, char) = enumerate(match) + if i in is + Markdown.with_output_format(print, :bold, io, char) + else + print(io, char) + end + end end - end end printmatch(args...) = printfuzzy(STDOUT, args...) function printmatches(io::IO, word, matches; cols = Base.tty_size()[2]) - total = 0 - for match in matches - total + length(match) + 1 > cols && break - fuzzyscore(word, match) < 0 && break - print(io, " ") - printmatch(io, word, match) - total += length(match) + 1 - end + total = 0 + for match in matches + total + length(match) + 1 > cols && break + fuzzyscore(word, match) < 0 && break + print(io, " ") + printmatch(io, word, match) + total += length(match) + 1 + end end printmatches(args...; cols = Base.tty_size()[2]) = printmatches(STDOUT, args..., cols = cols) function print_joined_cols(io::IO, ss, delim = "", last = delim; cols = Base.tty_size()[2]) - i = 0 - total = 0 - for i = 1:length(ss) - total += length(ss[i]) - total + max(i-2,0)*length(delim) + (i>1?1:0)*length(last) > cols && (i-=1; break) - end - print_joined(io, ss[1:i], delim, last) + i = 0 + total = 0 + for i = 1:length(ss) + total += length(ss[i]) + total + max(i-2,0)*length(delim) + (i>1?1:0)*length(last) > cols && (i-=1; break) + end + print_joined(io, ss[1:i], delim, last) end print_joined_cols(args...; cols = Base.tty_size()[2]) = print_joined_cols(STDOUT, args...; cols=cols) function print_correction(word) - cors = levsort(word, accessible(current_module())) - pre = "Perhaps you meant " - print(pre) - print_joined_cols(cors, ", ", " or "; cols = Base.tty_size()[2]-length(pre)) - return + cors = levsort(word, accessible(current_module())) + pre = "Perhaps you meant " + print(pre) + print_joined_cols(cors, ", ", " or "; cols = Base.tty_size()[2]-length(pre)) + println() + return end # Completion data @@ -504,9 +505,9 @@ moduleusings(mod) = ccall(:jl_module_usings, Any, (Any,), mod) filtervalid(names) = filter(x->!ismatch(r"#", x), map(string, names)) accessible(mod::Module) = - [names(mod, true, true), - map(names, moduleusings(mod))..., - builtins] |> unique |> filtervalid + [names(mod, true, true), + map(names, moduleusings(mod))..., + builtins] |> unique |> filtervalid completions(name) = fuzzysort(name, accessible(current_module())) completions(name::Symbol) = completions(string(name)) From 1824e229bfd15a03dd6a758b627988f05879e39c Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Wed, 28 Jan 2015 01:01:13 +0000 Subject: [PATCH 3/6] spelling correction --- base/docs.jl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/base/docs.jl b/base/docs.jl index 1985a02ef8283..4dd1930c50166 100644 --- a/base/docs.jl +++ b/base/docs.jl @@ -361,9 +361,14 @@ macro repl (ex) printmatches($n, completions($n), cols=Base.tty_size()[2]-$(length(pre))); println("\n")) end) - # Backwards-compatible with the previous help system, for now - let doc = @doc $(esc(ex)) - doc ≠ nothing ? doc : Base.Help.@help_ $(esc(ex)) + if $(isa(ex, Symbol)) && !isdefined($(current_module()), $(Expr(:quote, ex))) + println($"Couldn't find $ex") + print_correction($(string(ex))) + else + # Backwards-compatible with the previous help system, for now + let doc = @doc $(esc(ex)) + doc ≠ nothing ? doc : Base.Help.@help_ $(esc(ex)) + end end end end From bf5c7ae14acc0219c7a291fe44dd6275fd11d35a Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Wed, 28 Jan 2015 01:16:46 +0000 Subject: [PATCH 4/6] more selective spellcheck --- base/docs.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/base/docs.jl b/base/docs.jl index 4dd1930c50166..095ddd3882e98 100644 --- a/base/docs.jl +++ b/base/docs.jl @@ -442,7 +442,12 @@ end function levsort(search, candidates) scores = map(cand -> levenshtein(search, cand), candidates) - candidates[sortperm(scores)] + candidates = candidates[sortperm(scores)] + i = 0 + for i = 1:length(candidates) + levenshtein(search, candidates[i]) > length(search)÷2 && break + end + return candidates[1:i] end # Result printing From 10d1af93fb9a0000b71140c3a83a84e2b9f5cf04 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Wed, 28 Jan 2015 11:15:09 +0000 Subject: [PATCH 5/6] algorithmic tweaks --- base/docs.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/base/docs.jl b/base/docs.jl index 095ddd3882e98..a33571041c85d 100644 --- a/base/docs.jl +++ b/base/docs.jl @@ -405,18 +405,17 @@ avgdistance(xs) = isempty(xs) ? 0 : (xs[end] - xs[1] - length(xs)+1)/length(xs) -function fuzzyscore(needle, haystack; shorter = true) +function fuzzyscore(needle, haystack) score = 0. is, acro = bestmatch(needle, haystack) score += (acro?2:1)length(is) # Matched characters score -= 2(length(needle)-length(is)) # Missing characters !acro && (score -= avgdistance(is)/10) # Contiguous !isempty(is) && (score -= mean(is)/100) # Closer to beginning - score += (shorter ? -1 : 1)length(haystack)/1000 # Shorter/longer words end -function fuzzysort(search, candidates; shorter = true) - scores = map(cand -> fuzzyscore(search, cand, shorter=shorter), candidates) +function fuzzysort(search, candidates) + scores = map(cand -> (fuzzyscore(search, cand), -levenshtein(search, cand)), candidates) candidates[sortperm(scores)] |> reverse end @@ -441,11 +440,11 @@ function levenshtein(s1, s2) end function levsort(search, candidates) - scores = map(cand -> levenshtein(search, cand), candidates) + scores = map(cand -> (levenshtein(search, cand), -fuzzyscore(search, cand)), candidates) candidates = candidates[sortperm(scores)] i = 0 for i = 1:length(candidates) - levenshtein(search, candidates[i]) > length(search)÷2 && break + levenshtein(search, candidates[i]) > 3 && break end return candidates[1:i] end From 29b7f672fc4f76aa361bd7e261d48e1a3de1967b Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Wed, 28 Jan 2015 14:05:42 +0000 Subject: [PATCH 6/6] small refactor --- base/docs.jl | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/base/docs.jl b/base/docs.jl index a33571041c85d..9717769faf4fe 100644 --- a/base/docs.jl +++ b/base/docs.jl @@ -351,19 +351,27 @@ catdoc(md::MD...) = MD(md...) # REPL help +function repl_search(s) + pre = "search:" + print(pre) + printmatches(s, completions(s), cols=Base.tty_size()[2]-length(pre)) + println("\n") +end + +function repl_corrections(s) + print("Couldn't find ") + Markdown.with_output_format(:cyan, STDOUT) do io + println(io, s) + end + print_correction(s) +end + macro repl (ex) quote # Fuzzy Searching - $(if isexpr(ex, Symbol) - n = string(ex) - pre = "search:" - :(print($pre); - printmatches($n, completions($n), cols=Base.tty_size()[2]-$(length(pre))); - println("\n")) - end) + $(isexpr(ex, Symbol)) && repl_search($(string(ex))) if $(isa(ex, Symbol)) && !isdefined($(current_module()), $(Expr(:quote, ex))) - println($"Couldn't find $ex") - print_correction($(string(ex))) + repl_corrections($(string(ex))) else # Backwards-compatible with the previous help system, for now let doc = @doc $(esc(ex))