From 2c406bb7d4732b2923f92c971ead8ab073e71ae0 Mon Sep 17 00:00:00 2001 From: Yggdroot Date: Mon, 7 Oct 2019 13:24:08 +0800 Subject: [PATCH] fix issue #360 and #366 Find file based on the current buffer --- autoload/leaderf/fuzzyMatch_C/fuzzyEngine.c | 235 +++++++++++++++++- autoload/leaderf/fuzzyMatch_C/fuzzyMatch.c | 215 ++++++++++++++-- autoload/leaderf/fuzzyMatch_C/fuzzyMatch.h | 13 +- autoload/leaderf/python/leaderf/fuzzyMatch.py | 88 +++++++ autoload/leaderf/python/leaderf/manager.py | 134 ++++++++-- doc/leaderf.txt | 7 + 6 files changed, 643 insertions(+), 49 deletions(-) diff --git a/autoload/leaderf/fuzzyMatch_C/fuzzyEngine.c b/autoload/leaderf/fuzzyMatch_C/fuzzyEngine.c index 13a5c717..b6f1d70a 100644 --- a/autoload/leaderf/fuzzyMatch_C/fuzzyEngine.c +++ b/autoload/leaderf/fuzzyMatch_C/fuzzyEngine.c @@ -44,7 +44,11 @@ typedef struct FeTaskItem typedef struct FeResult { - weight_t weight; + union + { + weight_t weight; + uint32_t path_weight; + }; uint32_t index; }FeResult; @@ -76,14 +80,27 @@ struct FuzzyEngine #else pthread_t* threads; #endif - PatternContext* pPattern_ctxt; - uint8_t is_name_only; + union + { + struct + { + PatternContext* pPattern_ctxt; + uint8_t is_name_only; + }; + struct + { + const char* filename; + const char* suffix; + const char* dirname; + }; + }; FeString* source; FeTaskItem* tasks; union { weight_t* weights; HighlightGroup** highlights; + uint32_t* path_weights; }; FeCircularQueue task_queue; }; @@ -281,7 +298,8 @@ struct FuzzyEngine enum { GETWEIGHT = 0, - GETHIGHLIGHTS + GETHIGHLIGHTS, + GETPATHWEIGHT }; #if defined(_MSC_VER) @@ -311,7 +329,7 @@ static void* _worker(void* pParam) pEngine->pPattern_ctxt, pEngine->is_name_only); } } - else + else if ( pTask->function == GETHIGHLIGHTS ) { HighlightGroup** results = pEngine->highlights + pTask->offset; uint32_t length = pTask->length; @@ -322,6 +340,16 @@ static void* _worker(void* pParam) pEngine->pPattern_ctxt, pEngine->is_name_only); } } + else if ( pTask->function == GETPATHWEIGHT ) + { + uint32_t* results = pEngine->path_weights + pTask->offset; + uint32_t length = pTask->length; + uint32_t i = 0; + for ( ; i < length; ++i ) + { + results[i] = getPathWeight(pEngine->filename, pEngine->suffix, pEngine->dirname, tasks[i].str, tasks[i].len); + } + } QUEUE_TASK_DONE(pEngine->task_queue); } @@ -474,7 +502,7 @@ static void delPatternContext(PyObject* obj) */ static PyObject* fuzzyEngine_initPattern(PyObject* self, PyObject* args) { - char* pattern; + const char* pattern; Py_ssize_t pattern_len; if ( !PyArg_ParseTuple(args, "s#:initPattern", &pattern, &pattern_len) ) @@ -1012,6 +1040,200 @@ static PyObject* fuzzyEngine_getHighlights(PyObject* self, PyObject* args, PyObj return res; } +/* sort in descending order */ +static int compare2(const void* a, const void* b) +{ + uint32_t wa = ((const FeResult*)a)->path_weight; + uint32_t wb = ((const FeResult*)b)->path_weight; + + return (int)wb - (int)wa; +} + +/** + * guessMatch(engine, source, filename, suffix, dirname, sort_results=True) + * + * e.g., /usr/src/example.tar.gz + * `filename` is "example.tar" + * `suffix` is ".gz" + * `dirname` is "/usr/src" + * + * return a tuple, (a list of corresponding weight, a sorted list of items from `source` that match `pattern`). + */ +static PyObject* fuzzyEngine_guessMatch(PyObject* self, PyObject* args, PyObject* kwargs) +{ + PyObject* py_engine = NULL; + PyObject* py_source = NULL; + const char* filename = NULL; + const char* suffix = NULL; + const char* dirname = NULL; + uint8_t sort_results = 1; + static char* kwlist[] = {"engine", "source", "filename", "suffix", "dirname", "sort_results", NULL}; + + if ( !PyArg_ParseTupleAndKeywords(args, kwargs, "OOsss|b:guessMatch", kwlist, &py_engine, + &py_source, &filename, &suffix, &dirname, &sort_results) ) + return NULL; + + FuzzyEngine* pEngine = (FuzzyEngine*)PyCapsule_GetPointer(py_engine, NULL); + if ( !pEngine ) + return NULL; + + if ( !PyList_Check(py_source) ) + { + PyErr_SetString(PyExc_TypeError, "parameter `source` must be a list."); + return NULL; + } + + uint32_t source_size = (uint32_t)PyList_Size(py_source); + if ( source_size == 0 ) + { + return Py_BuildValue("([],[])"); + } + + pEngine->filename = filename; + pEngine->suffix = suffix; + pEngine->dirname = dirname; + + uint32_t max_task_count = MAX_TASK_COUNT(pEngine->cpu_count); + uint32_t chunk_size = (source_size + max_task_count - 1) / max_task_count; + uint32_t task_count = (source_size + chunk_size - 1) / chunk_size; + + pEngine->source = (FeString*)malloc(source_size * sizeof(FeString)); + if ( !pEngine->source ) + { + fprintf(stderr, "Out of memory at %s:%d\n", __FILE__, __LINE__); + return NULL; + } + + pEngine->tasks = (FeTaskItem*)malloc(task_count * sizeof(FeTaskItem)); + if ( !pEngine->tasks ) + { + free(pEngine->source); + fprintf(stderr, "Out of memory at %s:%d\n", __FILE__, __LINE__); + return NULL; + } + + pEngine->path_weights = (uint32_t*)malloc(source_size * sizeof(uint32_t)); + if ( !pEngine->path_weights ) + { + free(pEngine->source); + free(pEngine->tasks); + fprintf(stderr, "Out of memory at %s:%d\n", __FILE__, __LINE__); + return NULL; + } + + FeResult* results = (FeResult*)malloc(source_size * sizeof(FeResult)); + if ( !results ) + { + free(pEngine->source); + free(pEngine->tasks); + free(pEngine->path_weights); + fprintf(stderr, "Out of memory at %s:%d\n", __FILE__, __LINE__); + return NULL; + } + + if ( !pEngine->threads ) + { +#if defined(_MSC_VER) + pEngine->threads = (HANDLE*)malloc(pEngine->cpu_count * sizeof(HANDLE)); +#else + pEngine->threads = (pthread_t*)malloc(pEngine->cpu_count * sizeof(pthread_t)); +#endif + if ( !pEngine->threads ) + { + free(pEngine->source); + free(pEngine->tasks); + free(pEngine->path_weights); + free(results); + fprintf(stderr, "Out of memory at %s:%d\n", __FILE__, __LINE__); + return NULL; + } + + uint32_t i = 0; + for ( ; i < pEngine->cpu_count; ++i) + { +#if defined(_MSC_VER) + pEngine->threads[i] = CreateThread(NULL, 0, _worker, pEngine, 0, NULL); + if ( !pEngine->threads[i] ) +#else + int ret = pthread_create(&pEngine->threads[i], NULL, _worker, pEngine); + if ( ret != 0 ) +#endif + { + free(pEngine->source); + free(pEngine->tasks); + free(pEngine->path_weights); + free(results); + free(pEngine->threads); + fprintf(stderr, "pthread_create error!\n"); + return NULL; + } + } + } + +#if defined(_MSC_VER) + QUEUE_SET_TASK_COUNT(pEngine->task_queue, task_count); +#endif + + uint32_t i = 0; + for ( ; i < task_count; ++i ) + { + uint32_t offset = i * chunk_size; + uint32_t length = MIN(chunk_size, source_size - offset); + + pEngine->tasks[i].offset = offset; + pEngine->tasks[i].length = length; + pEngine->tasks[i].function = GETPATHWEIGHT; + + uint32_t j = 0; + for ( ; j < length; ++j ) + { + FeString *s = pEngine->source + offset + j; + PyObject* item = PyList_GET_ITEM(py_source, offset + j); + if ( pyObject_ToStringAndSize(item, &s->str, &s->len) < 0 ) + { + free(pEngine->source); + free(pEngine->tasks); + free(pEngine->path_weights); + free(results); + fprintf(stderr, "pyObject_ToStringAndSize error!\n"); + return NULL; + } + } + + QUEUE_PUT(pEngine->task_queue, pEngine->tasks + i); + } + + QUEUE_JOIN(pEngine->task_queue); /* blocks until all tasks have finished */ + + for ( i = 0; i < source_size; ++i ) + { + results[i].path_weight = pEngine->path_weights[i]; + results[i].index = i; + } + + if ( sort_results ) + { + qsort(results, source_size, sizeof(FeResult), compare2); + } + + PyObject* weight_list = PyList_New(source_size); + PyObject* text_list = PyList_New(source_size); + for ( i = 0; i < source_size; ++i ) + { + /* PyList_SET_ITEM() steals a reference to item. */ + /* PySequence_GetItem() return value: New reference. */ + PyList_SET_ITEM(weight_list, i, Py_BuildValue("I", results[i].path_weight)); + PyList_SET_ITEM(text_list, i, PySequence_GetItem(py_source, results[i].index)); + } + + free(pEngine->source); + free(pEngine->tasks); + free(pEngine->path_weights); + free(results); + + return Py_BuildValue("(NN)", weight_list, text_list); +} + static PyMethodDef fuzzyEngine_Methods[] = { { "createFuzzyEngine", (PyCFunction)fuzzyEngine_createFuzzyEngine, METH_VARARGS | METH_KEYWORDS, "" }, @@ -1020,6 +1242,7 @@ static PyMethodDef fuzzyEngine_Methods[] = { "fuzzyMatch", (PyCFunction)fuzzyEngine_fuzzyMatch, METH_VARARGS | METH_KEYWORDS, "" }, { "fuzzyMatchEx", (PyCFunction)fuzzyEngine_fuzzyMatchEx, METH_VARARGS | METH_KEYWORDS, "" }, { "getHighlights", (PyCFunction)fuzzyEngine_getHighlights, METH_VARARGS | METH_KEYWORDS, "" }, + { "guessMatch", (PyCFunction)fuzzyEngine_guessMatch, METH_VARARGS | METH_KEYWORDS, "" }, { NULL, NULL, 0, NULL } }; diff --git a/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.c b/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.c index 9adc69a6..f4616c39 100644 --- a/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.c +++ b/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.c @@ -142,7 +142,7 @@ static uint16_t valTable[64] = typedef struct TextContext { - char* text; + const char* text; uint64_t* text_mask; uint16_t text_len; uint16_t col_num; @@ -156,7 +156,7 @@ typedef struct ValueElements uint16_t end; }ValueElements; -PatternContext* initPattern(char* pattern, uint16_t pattern_len) +PatternContext* initPattern(const char* pattern, uint16_t pattern_len) { PatternContext* pPattern_ctxt = (PatternContext*)malloc(sizeof(PatternContext)); if ( !pPattern_ctxt ) @@ -200,7 +200,7 @@ ValueElements* evaluate_nameOnly(TextContext* pText_ctxt, uint16_t col_num = pText_ctxt->col_num; uint16_t j = pText_ctxt->offset; - char* pattern = pPattern_ctxt->pattern; + const char* pattern = pPattern_ctxt->pattern; uint16_t base_offset = pattern[k] * col_num; uint64_t x = text_mask[base_offset + (j >> 6)] >> (j & 63); uint16_t i = 0; @@ -242,7 +242,7 @@ ValueElements* evaluate_nameOnly(TextContext* pText_ctxt, uint16_t max_prefix_score = 0; float max_score = MIN_WEIGHT; - char* text = pText_ctxt->text; + const char* text = pText_ctxt->text; uint16_t text_len = pText_ctxt->text_len; uint16_t pattern_len = pPattern_ctxt->pattern_len - k; int64_t* pattern_mask = pPattern_ctxt->pattern_mask; @@ -403,7 +403,7 @@ ValueElements* evaluate(TextContext* pText_ctxt, uint16_t col_num = pText_ctxt->col_num; uint16_t j = pText_ctxt->offset; - char* pattern = pPattern_ctxt->pattern; + const char* pattern = pPattern_ctxt->pattern; uint16_t base_offset = pattern[k] * col_num; uint64_t x = text_mask[base_offset + (j >> 6)] >> (j & 63); uint16_t i = 0; @@ -445,7 +445,7 @@ ValueElements* evaluate(TextContext* pText_ctxt, uint16_t max_prefix_score = 0; float max_score = MIN_WEIGHT; - char* text = pText_ctxt->text; + const char* text = pText_ctxt->text; uint16_t text_len = pText_ctxt->text_len; uint16_t pattern_len = pPattern_ctxt->pattern_len - k; int64_t* pattern_mask = pPattern_ctxt->pattern_mask; @@ -453,7 +453,11 @@ ValueElements* evaluate(TextContext* pText_ctxt, uint16_t special = 0; if ( i == 0 ) special = 5; - else if ( text[i-1] == '/' || text[i-1] == '\\' ) +#if defined(_MSC_VER) + else if ( text[i-1] == '\\' || text[i-1] == '/' ) +#else + else if ( text[i-1] == '/' ) +#endif special = k == 0 ? 5 : 3; else if ( isupper(text[i]) ) special = !isupper(text[i-1]) || (i+1 < text_len && islower(text[i+1])) ? 3 : 0; @@ -565,7 +569,11 @@ ValueElements* evaluate(TextContext* pText_ctxt, i += FM_CTZ(x); } - if ( text[i-1] == '/' || text[i-1] == '\\' ) +#if defined(_MSC_VER) + if ( text[i-1] == '\\' || text[i-1] == '/' ) +#else + if ( text[i-1] == '/' ) +#endif special = k == 0 ? 5 : 3; else if ( isupper(text[i]) ) special = !isupper(text[i-1]) || (i+1 < text_len && islower(text[i+1])) ? 3 : 0; @@ -607,7 +615,7 @@ ValueElements* evaluate(TextContext* pText_ctxt, return val + k; } -float getWeight(char* text, uint16_t text_len, +float getWeight(const char* text, uint16_t text_len, PatternContext* pPattern_ctxt, uint8_t is_name_only) { @@ -617,7 +625,7 @@ float getWeight(char* text, uint16_t text_len, uint16_t j = 0; uint16_t col_num = 0; uint64_t* text_mask = NULL; - char* pattern = pPattern_ctxt->pattern; + const char* pattern = pPattern_ctxt->pattern; uint16_t pattern_len = pPattern_ctxt->pattern_len; int64_t* pattern_mask = pPattern_ctxt->pattern_mask; char first_char = pattern[0]; @@ -863,7 +871,7 @@ HighlightGroup* evaluateHighlights_nameOnly(TextContext* pText_ctxt, uint64_t* text_mask = pText_ctxt->text_mask; uint16_t col_num = pText_ctxt->col_num; - char* pattern = pPattern_ctxt->pattern; + const char* pattern = pPattern_ctxt->pattern; uint16_t base_offset = pattern[k] * col_num; uint64_t x = text_mask[base_offset + (j >> 6)] >> (j & 63); uint16_t i = 0; @@ -911,7 +919,7 @@ HighlightGroup* evaluateHighlights_nameOnly(TextContext* pText_ctxt, HighlightGroup cur_highlights; memset(&cur_highlights, 0, sizeof(HighlightGroup)); - char* text = pText_ctxt->text; + const char* text = pText_ctxt->text; uint16_t text_len = pText_ctxt->text_len; uint16_t pattern_len = pPattern_ctxt->pattern_len - k; int64_t* pattern_mask = pPattern_ctxt->pattern_mask; @@ -1085,7 +1093,7 @@ HighlightGroup* evaluateHighlights(TextContext* pText_ctxt, uint64_t* text_mask = pText_ctxt->text_mask; uint16_t col_num = pText_ctxt->col_num; - char* pattern = pPattern_ctxt->pattern; + const char* pattern = pPattern_ctxt->pattern; uint16_t base_offset = pattern[k] * col_num; uint64_t x = text_mask[base_offset + (j >> 6)] >> (j & 63); uint16_t i = 0; @@ -1133,7 +1141,7 @@ HighlightGroup* evaluateHighlights(TextContext* pText_ctxt, HighlightGroup cur_highlights; memset(&cur_highlights, 0, sizeof(HighlightGroup)); - char* text = pText_ctxt->text; + const char* text = pText_ctxt->text; uint16_t text_len = pText_ctxt->text_len; uint16_t pattern_len = pPattern_ctxt->pattern_len - k; int64_t* pattern_mask = pPattern_ctxt->pattern_mask; @@ -1141,7 +1149,11 @@ HighlightGroup* evaluateHighlights(TextContext* pText_ctxt, uint16_t special = 0; if ( i == 0 ) special = 5; - else if ( text[i-1] == '/' || text[i-1] == '\\' ) +#if defined(_MSC_VER) + else if ( text[i-1] == '\\' || text[i-1] == '/' ) +#else + else if ( text[i-1] == '/' ) +#endif special = k == 0 ? 5 : 3; else if ( isupper(text[i]) ) special = !isupper(text[i-1]) || (i+1 < text_len && islower(text[i+1])) ? 3 : 0; @@ -1262,7 +1274,11 @@ HighlightGroup* evaluateHighlights(TextContext* pText_ctxt, i += FM_CTZ(x); } - if ( text[i-1] == '/' || text[i-1] == '\\' ) +#if defined(_MSC_VER) + if ( text[i-1] == '\\' || text[i-1] == '/' ) +#else + if ( text[i-1] == '/' ) +#endif special = k == 0 ? 5 : 3; else if ( isupper(text[i]) ) special = !isupper(text[i-1]) || (i+1 < text_len && islower(text[i+1])) ? 3 : 0; @@ -1309,7 +1325,7 @@ HighlightGroup* evaluateHighlights(TextContext* pText_ctxt, * is the length of the highlight in bytes. * e.g., [ [2,3], [6,2], [10,4], ... ] */ -HighlightGroup* getHighlights(char* text, +HighlightGroup* getHighlights(const char* text, uint16_t text_len, PatternContext* pPattern_ctxt, uint8_t is_name_only) @@ -1319,7 +1335,7 @@ HighlightGroup* getHighlights(char* text, uint16_t col_num = 0; uint64_t* text_mask = NULL; - char* pattern = pPattern_ctxt->pattern; + const char* pattern = pPattern_ctxt->pattern; uint16_t pattern_len = pPattern_ctxt->pattern_len; int64_t* pattern_mask = pPattern_ctxt->pattern_mask; char first_char = pattern[0]; @@ -1552,6 +1568,163 @@ HighlightGroup* getHighlights(char* text, return pGroup; } +/** + * e.g., /usr/src/example.tar.gz + * `dirname` is "/usr/src" + * `basename` is "example.tar.gz" + * `filename` is "example.tar", `suffix` is ".gz" + */ +uint32_t getPathWeight(const char* filename, + const char* suffix, + const char* dirname, + const char* path, uint32_t path_len) +{ + uint32_t filename_lcp = 0; + uint32_t filename_prefix = 0; + uint32_t dirname_lcp = 0; + uint32_t is_suffix_diff = 0; + uint32_t is_basename_same = 0; + uint32_t is_dirname_same = 0; + + const char* filename_start = path; + const char* p = path + path_len; + const char* p1 = NULL; + + while ( p >= path ) + { +#if defined(_MSC_VER) + if ( *p == '\\' || *p == '/' ) +#else + if ( *p == '/' ) +#endif + { + filename_start = p + 1; + break; + } + --p; + } + + if ( *suffix != '\0' ) + { + p = filename_start; + p1 = filename; + while ( *p != '\0' && *p == *p1 ) + { + ++filename_lcp; + ++p; + ++p1; + } + + filename_prefix = filename_lcp; + + if ( filename_lcp > 0 && ((*p >= 'a' && *p <= 'z') || (*p >= '0' && *p <= '9') + || (*p1 >= 'a' && *p1 <= 'z') || (*p1 >= '0' && *p1 <= '9')) ) + { + --p; + while ( p > filename_start ) + { + if ( *p >= 'a' && *p <= 'z' ) + { + --p; + } + else + { + break; + } + } + filename_prefix = p - filename_start; + } + + p = path + path_len - 1; + while ( p > filename_start ) + { + if ( *p == '.' ) + { + if ( strcmp(suffix, p) != 0 ) + { + if ( filename_lcp > 0 ) + is_suffix_diff = 1; + } + else if ( *p1 == '\0' && filename_lcp == p - filename_start ) + { + is_basename_same = 1; + } + break; + } + --p; + } + } + else + { + is_basename_same = strcmp(filename, filename_start) == 0; + } + + p = path; + p1 = dirname; +#if defined(_MSC_VER) + while ( p < filename_start ) + { + if ( *p1 == '\\' ) + { + if ( *p == '\\' || *p == '/' ) + { + ++dirname_lcp; + } + else + { + break; + } + } + else if ( *p != *p1 ) + { + break; + } + ++p; + ++p1; + } +#else + while ( p < filename_start && *p == *p1 ) + { + if ( *p == '/' ) + { + ++dirname_lcp; + } + ++p; + ++p1; + } +#endif + /** + * e.g., dirname = "abc" , path = "abc/test.h" + * p1 != dirname is to avoid such a case: + * e.g., buffer name is "aaa.h", path is "/abc/def.h" + */ +#if defined(_MSC_VER) + if ( *p1 == '\0' && p1 != dirname && (*p == '\\' || *p == '/') ) +#else + if ( *p1 == '\0' && p1 != dirname && *p == '/' ) +#endif + { + ++dirname_lcp; + } + + /* if dirname is empty, filename_start == p */ + is_dirname_same = filename_start - p < 2; + + /* dirname/filename+suffix is the same as path */ + if ( is_basename_same && is_dirname_same ) + { + return 0; + } + + if ( filename_start == path && *dirname == '\0') + { + dirname_lcp = 1; + } + + return (((filename_prefix + 1) << 24) | (dirname_lcp << 12) | (is_dirname_same << 11) + | filename_lcp) + (is_suffix_diff << 2) - path_len; +} + static void delPatternContext(PyObject* obj) { free(PyCapsule_GetPointer(obj, NULL)); @@ -1559,7 +1732,7 @@ static void delPatternContext(PyObject* obj) static PyObject* fuzzyMatchC_initPattern(PyObject* self, PyObject* args) { - char* pattern; + const char* pattern; Py_ssize_t pattern_len; if ( !PyArg_ParseTuple(args, "s#:initPattern", &pattern, &pattern_len) ) @@ -1572,7 +1745,7 @@ static PyObject* fuzzyMatchC_initPattern(PyObject* self, PyObject* args) static PyObject* fuzzyMatchC_getWeight(PyObject* self, PyObject* args, PyObject* kwargs) { - char* text; + const char* text; Py_ssize_t text_len; PyObject* py_patternCtxt; uint8_t is_name_only; @@ -1591,7 +1764,7 @@ static PyObject* fuzzyMatchC_getWeight(PyObject* self, PyObject* args, PyObject* static PyObject* fuzzyMatchC_getHighlights(PyObject* self, PyObject* args, PyObject* kwargs) { - char* text; + const char* text; Py_ssize_t text_len; PyObject* py_patternCtxt; uint8_t is_name_only; diff --git a/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.h b/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.h index e4ea2996..0e5c1bbe 100644 --- a/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.h +++ b/autoload/leaderf/fuzzyMatch_C/fuzzyMatch.h @@ -23,7 +23,7 @@ typedef struct PatternContext { - char* pattern; + const char* pattern; int64_t pattern_mask[256]; uint16_t pattern_len; uint8_t is_lower; @@ -48,11 +48,16 @@ typedef struct HighlightGroup extern "C" { #endif -PatternContext* initPattern(char* pattern, uint16_t pattern_len); +PatternContext* initPattern(const char* pattern, uint16_t pattern_len); -float getWeight(char* text, uint16_t text_len, PatternContext* pPattern_ctxt, uint8_t is_name_only); +float getWeight(const char* text, uint16_t text_len, PatternContext* pPattern_ctxt, uint8_t is_name_only); -HighlightGroup* getHighlights(char* text, uint16_t text_len, PatternContext* pPattern_ctxt, uint8_t is_name_only); +HighlightGroup* getHighlights(const char* text, uint16_t text_len, PatternContext* pPattern_ctxt, uint8_t is_name_only); + +uint32_t getPathWeight(const char* filename, + const char* suffix, + const char* dirname, + const char* path, uint32_t path_len); #ifdef __cplusplus } diff --git a/autoload/leaderf/python/leaderf/fuzzyMatch.py b/autoload/leaderf/python/leaderf/fuzzyMatch.py index ac529c9d..6a843ff2 100644 --- a/autoload/leaderf/python/leaderf/fuzzyMatch.py +++ b/autoload/leaderf/python/leaderf/fuzzyMatch.py @@ -16,6 +16,7 @@ # -*- coding: utf-8 -*- import sys +import os.path if sys.version_info >= (3, 0): def Unicode(str, encoding): @@ -663,3 +664,90 @@ def getHighlights(self, text): return highlights + # e.g., /usr/src/example.tar.gz + # `dirname` is "/usr/src" + # `basename` is "example.tar.gz" + # `filename` is "example.tar", `suffix` is ".gz" + @staticmethod + def getPathWeight(filename, suffix, dirname, path): + filename_lcp = 0 + filename_prefix = 0 + dirname_lcp = 0 + is_suffix_diff = 0 + is_basename_same = 0 + is_dirname_same = 0 + + filename_start = 0 + path_len = len(path) + + for i, c in enumerate(reversed(path)): + if c in '/\\': + filename_start = path_len - i + break + + if suffix != "": + i = 0 + min_len = min(path_len - filename_start, len(filename)) + while i < min_len: + if path[filename_start+i] == filename[i]: + filename_lcp += 1 + i += 1 + else: + break + + filename_prefix = filename_lcp + + if i == path_len - filename_start: + p = '.' + else: + p = path[filename_start+i] + + if i == len(filename): + p1 = '.' + else: + p1 = filename[i] + + if filename_lcp > 0 and ('a' <= p <= 'z' or '0' <= p <= '9' + or 'a' <= p1 <= 'z' or '0' <= p1 <= '9'): + filename_prefix -= 1 + while filename_prefix > 0: + if 'a' <= filename[filename_prefix] <= 'z': + filename_prefix -= 1 + else: + break + + if filename_lcp > 0: + root, path_suffix = os.path.splitext(path) + if path_suffix != suffix: + is_suffix_diff = 1 + if is_suffix_diff == 0 and filename_lcp == len(filename) == len(root) - filename_start: + is_basename_same = 1 + else: + is_basename_same = 1 if filename == path[filename_start:] else 0 + + i = 0 + min_len = min(filename_start, len(dirname)) + while i < min_len: + if dirname[i] in '/\\': + if path[i] in '/\\': + dirname_lcp += 1 + else: + break + elif dirname[i] != path[i]: + break + i += 1 + + if i > 0 and i == len(dirname) and i < path_len and path[i] in '/\\': + dirname_lcp += 1 + + if filename_start - i < 2: + is_dirname_same = 1 + + if is_dirname_same and is_basename_same: + return 0 + + if filename_start == 0 and dirname == "": + dirname_lcp = 1 + + return (((filename_prefix + 1) << 24) | (dirname_lcp << 12) | (is_dirname_same << 11) + | filename_lcp) + (is_suffix_diff << 2) - path_len diff --git a/autoload/leaderf/python/leaderf/manager.py b/autoload/leaderf/python/leaderf/manager.py index d12d660e..9d136f8f 100644 --- a/autoload/leaderf/python/leaderf/manager.py +++ b/autoload/leaderf/python/leaderf/manager.py @@ -88,6 +88,7 @@ def __init__(self): self._highlight_method = lambda : None self._orig_cwd = None self._cursorline_dict = {} + self._empty_query = lfEval("get(g:, 'Lf_EmptyQuery', 1)") == '1' self._getExplClass() #************************************************************** @@ -203,6 +204,7 @@ def _setStlMode(self, **kwargs): def _beforeEnter(self): self._resetAutochdir() + self._cur_buffer = vim.current.buffer def _afterEnter(self): if "--nowrap" in self._arguments: @@ -232,7 +234,8 @@ def _afterExit(self): pass def _bangEnter(self): - pass + if self._cli.pattern: + self._search(self._content) def _bangReadFinished(self): pass @@ -359,7 +362,15 @@ def _createHelpHint(self): self._getInstance().buffer.options['modifiable'] = True self._getInstance().buffer.append(help[::-1]) self._getInstance().buffer.options['modifiable'] = False - self._getInstance().window.height = len(self._getInstance().buffer) + buffer_len = len(self._getInstance().buffer) + if buffer_len < self._initial_count: + if "--nowrap" not in self._arguments: + self._getInstance().window.height = min(self._initial_count, + self._getInstance()._actualLength(self._getInstance().buffer)) + else: + self._getInstance().window.height = buffer_len + elif self._getInstance().window.height < self._initial_count: + self._getInstance().window.height = self._initial_count lfCmd("normal! Gzb") self._getInstance().window.cursor = (orig_row, 0) else: @@ -474,6 +485,7 @@ def _search(self, content, is_continue=False, step=0): self._clearHighlights() self._clearHighlightsPos() self._cli.highlightMatches() + if not self._cli.pattern: # e.g., when or is typed self._getInstance().setBuffer(content[:self._initial_count]) self._getInstance().setStlResultsCount(len(content)) @@ -503,11 +515,11 @@ def _filter(self, step, filter_method, content, is_continue, length = len(content) if self._index == 0: self._cb_content = [] - self._result_content = content + self._result_content = [] self._index = min(step, length) cur_content = content[:self._index] else: - if not is_continue and not self._getInstance().empty(): + if not is_continue and self._result_content: self._cb_content += self._result_content if len(self._cb_content) >= step: @@ -857,6 +869,47 @@ def _fuzzySearch(self, content, is_continue, step): self._highlight_method = highlight_method self._highlight_method() + def _guessFilter(self, filename, suffix, dirname, iterable): + """ + return a list, each item is a pair (weight, line) + """ + return ((FuzzyMatch.getPathWeight(filename, suffix, dirname, line), line) for line in iterable) + + def _guessSearch(self, content, is_continue=False, step=0): + if self._cur_buffer.name == '' or self._cur_buffer.options["buftype"] != b'': + self._getInstance().setBuffer(content[:self._initial_count]) + self._getInstance().setStlResultsCount(len(content)) + self._result_content = [] + return + + buffer_name = os.path.normpath(lfDecode(self._cur_buffer.name)) + if lfEval("g:Lf_ShowRelativePath") == '1': + buffer_name = os.path.relpath(buffer_name) + + buffer_name = lfEncode(buffer_name) + dirname, basename = os.path.split(buffer_name) + filename, suffix = os.path.splitext(basename) + if self._fuzzy_engine: + filter_method = partial(fuzzyEngine.guessMatch, engine=self._fuzzy_engine, filename=filename, + suffix=suffix, dirname=dirname, sort_results=not is_continue) + step = len(content) + + pair = self._filter(step, filter_method, content, is_continue, True) + if is_continue: # result is not sorted + pairs = sorted(zip(*pair), key=operator.itemgetter(0), reverse=True) + self._result_content = self._getList(pairs) + else: + self._result_content = pair[1] + else: + step = len(content) + filter_method = partial(self._guessFilter, filename, suffix, dirname) + pairs = self._filter(step, filter_method, content, is_continue) + pairs.sort(key=operator.itemgetter(0), reverse=True) + self._result_content = self._getList(pairs) + + self._getInstance().setBuffer(self._result_content[:self._initial_count]) + self._getInstance().setStlResultsCount(len(self._result_content)) + def _highlight_and_mode(self, highlight_methods): self._clearHighlights() for i, highlight_method in enumerate(highlight_methods): @@ -1403,8 +1456,8 @@ def startExplorer(self, win_pos, *args, **kwargs): else: lfCmd("normal! gg") self._index = 0 - self._pattern = kwargs.get("pattern", "") or arguments_dict.get("--input", [""])[0] - self._cli.setPattern(self._pattern) + pattern = kwargs.get("pattern", "") or arguments_dict.get("--input", [""])[0] + self._cli.setPattern(pattern) self._start_time = time.time() self._bang_start_time = self._start_time @@ -1414,17 +1467,21 @@ def startExplorer(self, win_pos, *args, **kwargs): self._read_content_exception = None if isinstance(content, list): self._is_content_list = True + self._read_finished = 1 + if len(content[0]) == len(content[0].rstrip("\r\n")): self._content = content else: self._content = [line.rstrip("\r\n") for line in content] self._getInstance().setStlTotal(len(self._content)//self._getUnit()) - self._result_content = self._content + self._result_content = [] + self._cb_content = [] self._getInstance().setStlResultsCount(len(self._content)) if lfEval("g:Lf_RememberLastSearch") == '1' and self._launched and self._cli.pattern: pass else: - self._getInstance().setBuffer(self._content[:self._initial_count]) + if not (self._empty_query and self._getExplorer().getStlCategory() in ["File"]): + self._getInstance().setBuffer(self._content[:self._initial_count]) if lfEval("has('nvim')") == '1': lfCmd("redrawstatus") @@ -1436,9 +1493,26 @@ def startExplorer(self, win_pos, *args, **kwargs): lfCmd("echo") self._getInstance().buffer.options['modifiable'] = False self._bangEnter() + + if self._empty_query and self._getExplorer().getStlCategory() in ["File"]: + self._guessSearch(self._content) + if self._result_content: # self._result_content is [] only if + # self._cur_buffer.name == '' or self._cur_buffer.options["buftype"] != b'': + self._getInstance().appendBuffer(self._result_content[self._initial_count:]) + else: + self._getInstance().appendBuffer(self._content[self._initial_count:]) + + if self._timer_id is not None: + lfCmd("call timer_stop(%s)" % self._timer_id) + self._timer_id = None + + self._bangReadFinished() + + lfCmd("echohl WarningMsg | redraw | echo ' Done!' | echohl NONE") elif isinstance(content, AsyncExecutor.Result): self._is_content_list = False self._result_content = [] + self._cb_content = [] self._callback = self._workInIdle if lfEval("get(g:, 'Lf_NoAsync', 0)") == '1': self._content = self._getInstance().initBuffer(content, self._getUnit(), self._getExplorer().setContent) @@ -1475,6 +1549,7 @@ def startExplorer(self, win_pos, *args, **kwargs): else: self._is_content_list = False self._result_content = [] + self._cb_content = [] self._callback = partial(self._workInIdle, content) if lfEval("get(g:, 'Lf_NoAsync', 0)") == '1': self._content = self._getInstance().initBuffer(content, self._getUnit(), self._getExplorer().setContent) @@ -1550,6 +1625,22 @@ def _workInIdle(self, content=None, bang=False): if self._cli.pattern: self._getInstance().setStlResultsCount(len(self._result_content)) + elif self._empty_query and self._getExplorer().getStlCategory() in ["File"]: + self._guessSearch(self._content) + if bang: + if self._result_content: # self._result_content is [] only if + # self._cur_buffer.name == '' or self._cur_buffer.options["buftype"] != b'': + self._getInstance().appendBuffer(self._result_content[self._initial_count:]) + else: + self._getInstance().appendBuffer(self._content[self._initial_count:]) + + if self._timer_id is not None: + lfCmd("call timer_stop(%s)" % self._timer_id) + self._timer_id = None + + self._bangReadFinished() + + lfCmd("echohl WarningMsg | redraw | echo ' Done!' | echohl NONE") else: if bang: if self._getInstance().empty(): @@ -1576,14 +1667,18 @@ def _workInIdle(self, content=None, bang=False): self._getInstance().setStlRunning(False) lfCmd("redrawstatus") - if self._cli.pattern and (self._index < len(self._content) or len(self._cb_content) > 0): - if self._fuzzy_engine: - step = 10000 * cpu_count - elif is_fuzzyMatch_C: - step = 10000 - else: - step = 2000 - self._search(self._content, True, step) + if self._cli.pattern: + if self._index < len(self._content) or len(self._cb_content) > 0: + if self._fuzzy_engine: + step = 10000 * cpu_count + elif is_fuzzyMatch_C: + step = 10000 + else: + step = 2000 + self._search(self._content, True, step) + + if bang: + self._getInstance().appendBuffer(self._result_content[self._initial_count:]) else: cur_len = len(self._content) if time.time() - self._start_time > 0.1: @@ -1635,8 +1730,11 @@ def input(self): self._hideHelp() self._resetHighlights() - if self._pattern: - self._search(self._content) + if self._cli.pattern: # --input xxx or from normal mode to input mode + if self._index == 0: # --input xxx + self._search(self._content) + elif self._empty_query and self._getExplorer().getStlCategory() in ["File"]: + self._guessSearch(self._content) for cmd in self._cli.input(self._callback): cur_len = len(self._content) diff --git a/doc/leaderf.txt b/doc/leaderf.txt index e082418b..fa4a89c7 100644 --- a/doc/leaderf.txt +++ b/doc/leaderf.txt @@ -617,6 +617,13 @@ g:Lf_Gtagslabel *g:Lf_Gtagslabel* set this option as "native-pygments". Default value is "default". +g:Lf_EmptyQuery *g:Lf_EmptyQuery* + This option specifies whether to enable the empty query, i.e., if no + pattern is input, sort the result according to the best match of current + buffer's name. + + Default value is 1. + ============================================================================== USAGE *leaderf-usage*