diff --git a/CHANGELOG.md b/CHANGELOG.md index 1db32814b..ca0691916 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ CHANGELOG - Print a note about Rust nightly is requred for building the Python dynamic module. - Refine the syntax of `Clap lines` with `ClapLinesBufname` and `ClapLinesNumber` group added. - [perf] Use const table instead of `lazy_static` for the icons, [more info](https://github.com/liuchengxu/vim-clap/pull/354#discussion_r395975392). Thanks to @ImmemorConsultrixContrarie . +- [perf] Major improvement :tada: support the filter dynamic support, contribution by @ImmemorConsultrixContrarie. ([#364](https://github.com/liuchengxu/vim-clap/pull/364)) ### Fixed diff --git a/autoload/clap.vim b/autoload/clap.vim index 7183fa222..6fa9d4d1c 100644 --- a/autoload/clap.vim +++ b/autoload/clap.vim @@ -145,22 +145,6 @@ function! clap#_init() abort call g:clap.display.setbufvar('&filetype', '') endfunction -function! s:unlet_vars(vars) abort - for var in a:vars - if exists(var) - execute 'unlet' var - endif - endfor -endfunction - -function! s:remove_provider_tmp_vars(vars) abort - for var in a:vars - if has_key(g:clap.provider, var) - call remove(g:clap.provider, var) - endif - endfor -endfunction - function! clap#_exit() abort call g:clap.provider.jobstop() call clap#forerunner#stop() @@ -178,20 +162,10 @@ function! clap#_exit() abort call g:clap.input.clear() call g:clap.display.clear() - call s:remove_provider_tmp_vars([ - \ 'args', - \ 'source_tempfile', - \ 'should_switch_to_async', - \ ]) - - call s:unlet_vars([ - \ 'g:__clap_fuzzy_matched_indices', - \ 'g:__clap_forerunner_result', - \ 'g:__clap_lines_truncated_map', - \ ]) - call clap#sign#reset() + call clap#state#clear_post() + call map(g:clap.tmps, 'delete(v:val)') let g:clap.tmps = [] endfunction @@ -268,7 +242,7 @@ function! s:try_register_is_ok(provider_id) abort try let registration_info = g:clap#provider#{provider_id}# catch /^Vim\%((\a\+)\)\=:E121/ - call clap#helper#echo_error('Fail to load the provider: '.provider_id) + call clap#helper#echo_error('Fail to load provider: '.provider_id.', E:'.v:exception) return v:false endtry endif @@ -283,21 +257,6 @@ function! s:try_register_is_ok(provider_id) abort return s:validate_provider(registration_info) endfunction -function! s:clear_state() abort - call s:unlet_vars([ - \ 'g:__clap_provider_cwd', - \ 'g:__clap_raw_source', - \ 'g:__clap_initial_source_size', - \ ]) - - if exists('g:__clap_forerunner_tempfile') - if filereadable(g:__clap_forerunner_tempfile) - call delete(g:__clap_forerunner_tempfile) - endif - unlet g:__clap_forerunner_tempfile - endif -endfunction - function! clap#for(provider_id_or_alias) abort if has_key(s:provider_alias, a:provider_id_or_alias) let provider_id = s:provider_alias[a:provider_id_or_alias] @@ -314,6 +273,9 @@ function! clap#for(provider_id_or_alias) abort return endif + call clap#state#clear_pre() + + " g:__clap_provider_cwd can be set during this process, so this needs to be executed after s:clear_state() if has_key(g:clap.provider._(), 'source') if has_key(g:clap.provider._(), 'source_type') let g:clap.provider.source_type = g:clap.provider._().source_type @@ -323,8 +285,6 @@ function! clap#for(provider_id_or_alias) abort endif endif - call s:clear_state() - call clap#handler#init() call g:clap.open_win() diff --git a/autoload/clap/api.vim b/autoload/clap/api.vim index e7bfe2514..731073c93 100644 --- a/autoload/clap/api.vim +++ b/autoload/clap/api.vim @@ -442,25 +442,6 @@ function! s:init_provider() abort return cmd endfunction - " Return the cached source tmp file, - " otherwise write the source list into a temp file and then return it. - function! s:into_source_tmp_file(source_list) abort - if has_key(g:clap.provider, 'source_tempfile') - let tmp = g:clap.provider.source_tempfile - return tmp - else - let tmp = tempname() - if writefile(a:source_list, tmp) == 0 - call add(g:clap.tmps, tmp) - let g:clap.provider.source_tempfile = tmp - return tmp - else - call g:clap.abort('Fail to write source to a temp file') - return '' - endif - endif - endfunction - function! provider.source_async_or_default() abort if has_key(self._(), 'source_async') return self._().source_async() @@ -472,7 +453,9 @@ function! s:init_provider() abort return s:wrap_async_cmd(Source) elseif self.source_type == g:__t_func_string return s:wrap_async_cmd(Source()) - elseif self.source_type == g:__t_list + endif + + if self.source_type == g:__t_list let lines = copy(Source) " This optimization has been moved to on_typed_async_impl() " elseif self.id ==# 'blines' @@ -484,7 +467,7 @@ function! s:init_provider() abort let ext_filter_cmd = clap#filter#async#external#get_cmd_or_default() - let tmp = s:into_source_tmp_file(lines) + let tmp = clap#state#into_tempfile(lines) let cmd = s:read_from_file_or_pipe(ext_filter_cmd, tmp) return cmd diff --git a/autoload/clap/filter/async/dyn.vim b/autoload/clap/filter/async/dyn.vim new file mode 100644 index 000000000..4ffb073b1 --- /dev/null +++ b/autoload/clap/filter/async/dyn.vim @@ -0,0 +1,43 @@ +" Author: liuchengxu +" Description: Dynamic update version of maple filter. + +let s:save_cpo = &cpoptions +set cpoptions&vim + +function! s:handle_message(msg) abort + if !g:clap.display.win_is_valid() + \ || g:clap.input.get() !=# s:last_query + return + endif + + call clap#state#handle_message(a:msg) +endfunction + +function! clap#filter#async#dyn#start_directly(maple_cmd) abort + let s:last_query = g:clap.input.get() + call clap#job#stdio#start_service(function('s:handle_message'), a:maple_cmd) +endfunction + +function! clap#filter#async#dyn#start(cmd) abort + let s:last_query = g:clap.input.get() + call clap#job#stdio#start_dyn_filter_service(function('s:handle_message'), a:cmd) +endfunction + +function! clap#filter#async#dyn#from_tempfile(tempfile) abort + let s:last_query = g:clap.input.get() + if g:clap_enable_icon && index(['files', 'git_files'], g:clap.provider.id) > -1 + let enable_icon_opt = '--enable-icon' + else + let enable_icon_opt = '' + endif + let filter_cmd = printf('%s --number 100 --winwidth %d filter "%s" --input %s', + \ enable_icon_opt, + \ winwidth(g:clap.display.winid), + \ g:clap.input.get(), + \ a:tempfile + \ ) + call clap#job#stdio#start_service(function('s:handle_message'), clap#maple#build_cmd(filter_cmd)) +endfunction + +let &cpoptions = s:save_cpo +unlet s:save_cpo diff --git a/autoload/clap/filter/async/external.vim b/autoload/clap/filter/async/external.vim index d21478c54..f30656e55 100644 --- a/autoload/clap/filter/async/external.vim +++ b/autoload/clap/filter/async/external.vim @@ -48,7 +48,7 @@ endfunction function! s:cmd_of(ext_filter) abort if a:ext_filter ==# 'maple' - return clap#maple#filter_subcommand(g:clap.input.get()) + return clap#maple#sync_filter_subcommand(g:clap.input.get()) else return printf(s:ext_cmd[a:ext_filter], g:clap.input.get()) endif diff --git a/autoload/clap/forerunner.vim b/autoload/clap/forerunner.vim index 4d4011e3a..1220d6516 100644 --- a/autoload/clap/forerunner.vim +++ b/autoload/clap/forerunner.vim @@ -13,7 +13,7 @@ function! s:on_complete_common(lines, initial_size) abort endif let g:clap.display.initial_size = a:initial_size - call clap#state#refresh_matches_count(string(a:initial_size)) + call clap#state#refresh_matches_count_on_forerunner_done() let g:__clap_current_forerunner_status = g:clap_forerunner_status_sign.done call clap#spinner#refresh() diff --git a/autoload/clap/impl.vim b/autoload/clap/impl.vim index 4593f86b2..7ac0b01df 100644 --- a/autoload/clap/impl.vim +++ b/autoload/clap/impl.vim @@ -100,15 +100,27 @@ function! s:on_typed_async_impl() abort let source_file = expand('#'.g:clap.start.bufnr.':p') let blines_cmd = clap#maple#blines_subcommand(g:clap.input.get()) let maple_cmd = printf('%s %s', blines_cmd, source_file) - call clap#rooter#run(function('clap#maple#job_start'), maple_cmd) - else - let cmd = g:clap.provider.source_async_or_default() + call clap#filter#async#dyn#start_directly(maple_cmd) + return + endif - if clap#filter#async#external#using_maple() - call clap#rooter#run(function('clap#maple#job_start'), cmd) - else - call clap#rooter#run(function('clap#dispatcher#job_start'), cmd) + if clap#filter#async#external#using_maple() + if exists('g:__clap_forerunner_tempfile') + call clap#filter#async#dyn#from_tempfile(g:__clap_forerunner_tempfile) + return + endif + if g:clap.provider.source_type == g:__t_string + call clap#filter#async#dyn#start(g:clap.provider._().source) + return + elseif g:clap.provider.source_type == g:__t_func_string + call clap#filter#async#dyn#start(g:clap.provider._().source()) + return endif + let cmd = g:clap.provider.source_async_or_default() + call clap#rooter#run(function('clap#maple#job_start'), cmd) + else + let cmd = g:clap.provider.source_async_or_default() + call clap#rooter#run(function('clap#dispatcher#job_start'), cmd) endif call clap#spinner#set_busy() @@ -173,10 +185,12 @@ function! clap#impl#on_typed() abort call s:on_typed_async_impl() return endif + if exists('g:__clap_forerunner_result') call s:on_typed_sync_impl() return endif + if g:clap.provider.can_async() && \ (get(g:clap.context, 'async') is v:true || s:should_switch_to_async()) call s:on_typed_async_impl() diff --git a/autoload/clap/maple.vim b/autoload/clap/maple.vim index 6f7ecf69f..13bf750a5 100644 --- a/autoload/clap/maple.vim +++ b/autoload/clap/maple.vim @@ -54,7 +54,12 @@ function! s:on_complete() abort return endif - let decoded = json_decode(s:chunks[0]) + try + let decoded = json_decode(s:chunks[0]) + catch + echoerr '[maple]decoded on_complete:'.string(s:chunks) + return + endtry if has_key(decoded, 'error') call g:clap.display.set_lines([ \ 'The external job runs into some issue:', @@ -196,14 +201,15 @@ function! clap#maple#forerunner_exec_subcommand(cmd) abort return printf('%s %s %s', s:maple_bin, global_opt, subcommand) endfunction -function! clap#maple#filter_subcommand(query) abort +" Returns the filtered results after the input stream is complete. +function! clap#maple#sync_filter_subcommand(query) abort let global_opt = '--number '.g:clap.display.preload_capacity.' --winwidth '.winwidth(g:clap.display.winid) if g:clap.provider.id ==# 'files' && g:clap_enable_icon let global_opt .= ' --enable-icon' endif - let cmd = printf('%s %s filter "%s"', s:maple_bin, global_opt, a:query) + let cmd = printf('%s %s filter "%s" --sync', s:maple_bin, global_opt, a:query) return cmd endfunction diff --git a/autoload/clap/state.vim b/autoload/clap/state.vim index 966427279..160f3ffa5 100644 --- a/autoload/clap/state.vim +++ b/autoload/clap/state.vim @@ -8,6 +8,7 @@ set cpoptions&vim " may run into some erratic read-only error. function! clap#state#refresh_matches_count(cnt_str) abort let l:matches_cnt = a:cnt_str + let s:current_matches = a:cnt_str if get(g:clap.display, 'initial_size', -1) > 0 let l:matches_cnt .= '/'.g:clap.display.initial_size @@ -17,6 +18,12 @@ function! clap#state#refresh_matches_count(cnt_str) abort call clap#sign#reset_to_first_line() endfunction +function! clap#state#refresh_matches_count_on_forerunner_done() abort + if exists('s:current_matches') + call clap#indicator#set_matches(printf('[%s/%s]', s:current_matches, g:clap.display.initial_size)) + endif +endfunction + function! clap#state#handle_message(msg) abort let decoded = json_decode(a:msg) @@ -41,5 +48,72 @@ function! clap#state#handle_message(msg) abort endif endfunction +" Returns the cached source tmp file. +" +" Write the providers whose `source` is list-style into a tempfile. +function! clap#state#into_tempfile(source_list) abort + if has_key(g:clap.provider, 'source_tempfile') + let tmp = g:clap.provider.source_tempfile + return tmp + else + let tmp = tempname() + if writefile(a:source_list, tmp) == 0 + call add(g:clap.tmps, tmp) + let g:clap.provider.source_tempfile = tmp + return tmp + else + call g:clap.abort('Fail to write source to a temp file') + return '' + endif + endif +endfunction + +function! s:unlet_vars(vars) abort + for var in a:vars + if exists(var) + execute 'unlet' var + endif + endfor +endfunction + +function! s:remove_provider_tmp_vars(vars) abort + for var in a:vars + if has_key(g:clap.provider, var) + call remove(g:clap.provider, var) + endif + endfor +endfunction + +" Clear the previous temp state when invoking a new provider. +function! clap#state#clear_pre() abort + call s:unlet_vars([ + \ 'g:__clap_raw_source', + \ 'g:__clap_provider_cwd', + \ 'g:__clap_initial_source_size', + \ ]) + + if exists('g:__clap_forerunner_tempfile') + if filereadable(g:__clap_forerunner_tempfile) + call delete(g:__clap_forerunner_tempfile) + endif + unlet g:__clap_forerunner_tempfile + endif +endfunction + +" Clear temp state on clap#_exit() +function! clap#state#clear_post() abort + call s:remove_provider_tmp_vars([ + \ 'args', + \ 'source_tempfile', + \ 'should_switch_to_async', + \ ]) + + call s:unlet_vars([ + \ 'g:__clap_fuzzy_matched_indices', + \ 'g:__clap_forerunner_result', + \ 'g:__clap_lines_truncated_map', + \ ]) +endfunction + let &cpoptions = s:save_cpo unlet s:save_cpo diff --git a/crates/maple_cli/src/cmd/filter/dynamic.rs b/crates/maple_cli/src/cmd/filter/dynamic.rs new file mode 100644 index 000000000..d0afe4fe8 --- /dev/null +++ b/crates/maple_cli/src/cmd/filter/dynamic.rs @@ -0,0 +1,460 @@ +use super::*; +use extracted_fzy::match_and_score_with_positions; +use fuzzy_filter::FuzzyMatchedLineInfo; +use fuzzy_matcher::skim::fuzzy_indices; +use rayon::slice::ParallelSliceMut; +use std::io::{self, BufRead}; +use std::time::{Duration, Instant}; + +/// The constant to define the length of `top_` queues. +const ITEMS_TO_SHOW: usize = 100; + +const MAX_IDX: usize = ITEMS_TO_SHOW - 1; + +/// Refresh the top filtered results per 200 ms. +const UPDATE_INTERVAL: Duration = Duration::from_millis(200); + +trait Insert { + fn pop_and_insert(&mut self, idx: usize, value: T); +} + +impl Insert for [T; ITEMS_TO_SHOW] { + fn pop_and_insert(&mut self, idx: usize, value: T) { + if idx < MAX_IDX { + self.copy_within(idx..MAX_IDX, idx + 1); + self[idx] = value; + } else { + self[MAX_IDX] = value; + } + } +} + +/// Combine json and println macro. +/// +/// Neovim needs Content-length info when using stdio-based communication. +macro_rules! print_json_with_length { + ( $( $field:expr ),+ ) => { + { + let msg = serde_json::json!({ $(stringify!($field): $field,)* }); + if let Ok(s) = serde_json::to_string(&msg) { + println!("Content-length: {}\n\n{}", s.len(), s); + } + } + } +} + +/// This macro is a special thing for [`dyn_collect_all`] and [`dyn_collect_number`]. +macro_rules! insert_both { + // This macro pushes all things into buffer, pops one worst item from each top queue + // and then inserts all things into `top_` queues. + (pop; $index:expr, $score:expr, $text:expr, $indices:expr => $buffer:expr, $top_results:expr, $top_scores:expr) => {{ + match $index { + // If index is last possible, then the worst item is better than this we want to push in, + // and we do nothing. + Some(MAX_IDX) => $buffer.push(($text, $score, $indices)), + // Else, one item gets popped from the queue + // and other is inserted. + Some(idx) => { + insert_both!(idx + 1, $score, $text, $indices => $buffer, $top_results, $top_scores); + } + None => { + insert_both!(0, $score, $text, $indices => $buffer, $top_results, $top_scores); + } + } + }}; + + // This macro pushes all things into buffer and inserts all things into + // `top_` queues. + ($index:expr, $score:expr, $text:expr, $indices:expr => $buffer:expr, $top_results:expr, $top_scores:expr) => {{ + $buffer.push(($text, $score, $indices)); + $top_results.pop_and_insert($index, $buffer.len() - 1); + $top_scores.pop_and_insert($index, $score); + }}; +} + +type SelectedTopItemsInfo = (usize, [i64; ITEMS_TO_SHOW], [usize; ITEMS_TO_SHOW]); + +/// Returns Ok if all items in the iterator has been processed. +/// +/// First, let's try to produce `ITEMS_TO_SHOW` items to fill the topscores. +fn select_top_items_to_show( + buffer: &mut Vec, + iter: &mut impl Iterator, +) -> std::result::Result { + let mut top_scores: [i64; ITEMS_TO_SHOW] = [i64::min_value(); ITEMS_TO_SHOW]; + let mut top_results: [usize; ITEMS_TO_SHOW] = [usize::min_value(); ITEMS_TO_SHOW]; + + let mut total = 0; + let res = iter.try_for_each(|(text, score, indices)| { + let idx = match find_best_score_idx(&top_scores, score) { + Some(idx) => idx + 1, + None => 0, + }; + + insert_both!(idx, score, text, indices => buffer, top_results, top_scores); + + // Stop iterating after `ITEMS_TO_SHOW` iterations. + total += 1; + if total == ITEMS_TO_SHOW { + Err(()) + } else { + Ok(()) + } + }); + + if res.is_ok() { + Ok(total) + } else { + Err((total, top_scores, top_results)) + } +} + +/// Returns the index of best score in `top_scores`. +/// +/// Best results are stored in front, the bigger the better. +#[inline] +fn find_best_score_idx(top_scores: &[i64; ITEMS_TO_SHOW], score: i64) -> Option { + top_scores + .iter() + .enumerate() + .rev() // .rev(), because worse items are at the end. + .find(|&(_, &other_score)| other_score > score) + .map(|(idx, _)| idx) +} + +/// Returns the new freshed time when the new top scored items are sent to the client. +/// +/// Printing to stdout is to send the printed content to the client. +fn try_notify_top_results( + enable_icon: bool, + total: usize, + past: &Instant, + top_results_len: usize, + top_results: &[usize; ITEMS_TO_SHOW], + buffer: &[FuzzyMatchedLineInfo], +) -> std::result::Result { + if total % 16 == 0 { + let now = Instant::now(); + if now > *past + UPDATE_INTERVAL { + let mut indices = Vec::with_capacity(top_results_len); + let mut lines = Vec::with_capacity(top_results_len); + for &idx in top_results.iter() { + let (text, _, idxs) = std::ops::Index::index(buffer, idx); + indices.push(idxs); + let text = if enable_icon { + prepend_icon(&text) + } else { + text.clone() + }; + lines.push(text); + } + + print_json_with_length!(total, lines, indices); + + return Ok(now); + } + } + Err(()) +} + +/// To get dynamic updates, not so much should be changed, actually. +/// First: instead of collecting iterator into vector, this iterator +/// should be `for_each`ed or something like this. +/// Second: while iterator is `for_each`ed, its results are collected +/// into some collection, and `total` is increased by one for each iteration. +/// At some points of iteration that collection gets printed and voila! +/// +/// Though it sounds easy, there's one pitfalls: +/// `par_iter` iteration should use atomic `total`, because, well, it's parallel. +/// And some rough edges: if there's too much results, sorting and json+print +/// could take too much time. Same problem for too big `number`. +/// +/// So, to get dynamic results, I'm gonna use VecDeque with little constant space. +/// But there's a problem with `par_iter` again, as there should be mutexed access to the +/// VecDeque for this iterator. +/// +/// So, this particular function won't work in parallel context at all. +fn dyn_collect_all( + mut iter: impl Iterator, + enable_icon: bool, +) -> Vec { + let mut buffer = Vec::with_capacity({ + let (low, high) = iter.size_hint(); + high.unwrap_or(low) + }); + + let should_return = select_top_items_to_show(&mut buffer, &mut iter); + + let (mut total, mut top_scores, mut top_results) = match should_return { + Ok(_) => return buffer, + Err((t, top_scores, top_results)) => (t, top_scores, top_results), + }; + + // Now we have the full queue and can just pair `.pop_back()` with `.insert()` to keep + // the queue with best results the same size. + let mut past = std::time::Instant::now(); + iter.for_each(|(text, score, indices)| { + let idx = find_best_score_idx(&top_scores, score); + + insert_both!(pop; idx, score, text, indices => buffer, top_results, top_scores); + + total = total.wrapping_add(1); + + if let Ok(now) = try_notify_top_results( + enable_icon, + total, + &past, + top_results.len(), + &top_results, + &buffer, + ) { + past = now; + } + }); + + buffer +} + +/// If you only need a `number` of elements, then you don't need to collect all +/// items produced by the iterator. +/// +/// # Returns +/// +/// Tuple of `(total_number_of_iterations: usize, Vec<_>)`. +/// The vector is not sorted nor truncated. +// +// Even though the current implementation isn't the most effective thing to do it, +// I think, it's just good enough. And should be more effective than full +// `collect()` into Vec on big numbers of iterations. +fn dyn_collect_number( + mut iter: impl Iterator, + enable_icon: bool, + number: usize, +) -> (usize, Vec) { + // To not have problems with queues after sorting and truncating the buffer, + // buffer has the lowest bound of `ITEMS_TO_SHOW * 2`, not `number * 2`. + let mut buffer = Vec::with_capacity(2 * std::cmp::max(ITEMS_TO_SHOW, number)); + + let should_return = select_top_items_to_show(&mut buffer, &mut iter); + + let (mut total, mut top_scores, mut top_results) = match should_return { + Ok(t) => return (t, buffer), + Err((t, top_scores, top_results)) => (t, top_scores, top_results), + }; + + // Now we have the full queue and can just pair `.pop_back()` with `.insert()` to keep + // the queue with best results the same size. + let mut past = std::time::Instant::now(); + iter.for_each(|(text, score, indices)| { + let idx = find_best_score_idx(&top_scores, score); + + insert_both!(pop; idx, score, text, indices => buffer, top_results, top_scores); + + total += 1; + + if let Ok(now) = try_notify_top_results( + enable_icon, + total, + &past, + top_results.len(), + &top_results, + &buffer, + ) { + past = now; + } + + if buffer.len() == buffer.capacity() { + buffer.par_sort_unstable_by(|(_, v1, _), (_, v2, _)| v2.partial_cmp(&v1).unwrap()); + + for (idx, (_, score, _)) in buffer[..ITEMS_TO_SHOW].iter().enumerate() { + top_scores[idx] = *score; + top_results[idx] = idx; + } + + let half = buffer.len() / 2; + buffer.truncate(half); + } + }); + + (total, buffer) +} + +/// Returns the ranked results after applying fuzzy filter given the query string and a list of candidates. +pub fn dyn_fuzzy_filter_and_rank>( + query: &str, + source: Source, + algo: Option, + number: Option, + enable_icon: bool, + winwidth: Option, +) -> Result<()> { + let algo = algo.unwrap_or(Algo::Fzy); + + let scorer = |line: &str| match algo { + Algo::Skim => fuzzy_indices(line, query), + Algo::Fzy => match_and_score_with_positions(query, line) + .map(|(score, indices)| (score as i64, indices)), + }; + + if let Some(number) = number { + let (total, filtered) = match source { + Source::Stdin => dyn_collect_number( + io::stdin().lock().lines().filter_map(|lines_iter| { + lines_iter.ok().and_then(|line| { + scorer(&line).map(|(score, indices)| (line, score, indices)) + }) + }), + enable_icon, + number, + ), + Source::Exec(exec) => dyn_collect_number( + std::io::BufReader::new(exec.stream_stdout()?) + .lines() + .filter_map(|lines_iter| { + lines_iter.ok().and_then(|line| { + scorer(&line).map(|(score, indices)| (line, score, indices)) + }) + }), + enable_icon, + number, + ), + Source::File(fpath) => dyn_collect_number( + std::fs::read_to_string(fpath)?.lines().filter_map(|line| { + scorer(&line).map(|(score, indices)| (line.into(), score, indices)) + }), + enable_icon, + number, + ), + Source::List(list) => dyn_collect_number( + list.filter_map(|line| { + scorer(&line).map(|(score, indices)| (line, score, indices)) + }), + enable_icon, + number, + ), + }; + let (lines, indices, truncated_map) = process_top_items( + number, + filtered.into_iter().take(number), + winwidth.unwrap_or(62), + enable_icon, + ); + + if truncated_map.is_empty() { + print_json_with_length!(total, lines, indices); + } else { + print_json_with_length!(total, lines, indices, truncated_map); + } + } else { + let mut filtered = match source { + Source::Stdin => dyn_collect_all( + io::stdin().lock().lines().filter_map(|lines_iter| { + lines_iter.ok().and_then(|line| { + scorer(&line).map(|(score, indices)| (line, score, indices)) + }) + }), + enable_icon, + ), + Source::Exec(exec) => dyn_collect_all( + std::io::BufReader::new(exec.stream_stdout()?) + .lines() + .filter_map(|lines_iter| { + lines_iter.ok().and_then(|line| { + scorer(&line).map(|(score, indices)| (line, score, indices)) + }) + }), + enable_icon, + ), + Source::File(fpath) => dyn_collect_all( + std::fs::read_to_string(fpath)?.lines().filter_map(|line| { + scorer(line).map(|(score, indices)| (line.into(), score, indices)) + }), + enable_icon, + ), + Source::List(list) => dyn_collect_all( + list.filter_map(|line| { + scorer(&line).map(|(score, indices)| (line, score, indices)) + }), + enable_icon, + ), + }; + + filtered.par_sort_unstable_by(|(_, v1, _), (_, v2, _)| v2.partial_cmp(&v1).unwrap()); + + let ranked = filtered; + + for (text, _, indices) in ranked.iter() { + println_json!(text, indices); + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + // This is a very time-consuming test, + // results of which could be proved only be inspecting stdout. + // Definetly not something you want to run with `cargo test`. + #[ignore] + fn dynamic_results() { + use std::time::{SystemTime, UNIX_EPOCH}; + + const ALPHABET: [u8; 32] = [ + b'q', b'w', b'e', b'r', b't', b'y', b'u', b'i', b'o', b'p', b'[', b']', b'a', b's', + b'd', b'f', b'g', b'h', b'j', b'k', b'l', b';', b'z', b'x', b'c', b'v', b'b', b'n', + b'm', b',', b'.', b' ', + ]; + + // To mock the endless randomized text, we need three numbers: + // 1. A number of letters to change. + // then for each such number + // 2. Alphabet index to get a new letter, + // 3. Changing text index to write a new letter. + let now = SystemTime::now(); + let mut bytes: usize = now + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| UNIX_EPOCH.duration_since(now).unwrap()) + .as_secs() as usize; + + let mut changing_text: [u8; 16] = [ALPHABET[31]; 16]; + let mut total_lines_created: usize = 0; + dyn_fuzzy_filter_and_rank( + "abc", + Source::List( + std::iter::repeat_with(|| { + bytes = bytes.reverse_bits().rotate_right(3).wrapping_add(1); + + let mut n = bytes; + // Number of letter to change. + let i = (n % 4) + 1; + n /= 4; + for _ in 0..i { + let text_idx = n % 16; + n /= 16; + let ab_idx = n % 32; + n /= 32; + + changing_text[text_idx] = ALPHABET[ab_idx]; + } + + total_lines_created += 1; + if total_lines_created % 99999_usize.next_power_of_two() == 0 { + println!("Total lines created: {}", total_lines_created) + } + + String::from_utf8(changing_text.as_ref().to_owned()).unwrap() + }) + .take(usize::max_value() >> 8), + ), + Some(Algo::Fzy), + Some(100), + false, + None, + ) + .unwrap() + } +} diff --git a/crates/maple_cli/src/cmd/filter.rs b/crates/maple_cli/src/cmd/filter/mod.rs similarity index 94% rename from crates/maple_cli/src/cmd/filter.rs rename to crates/maple_cli/src/cmd/filter/mod.rs index 638f1638d..77110378e 100644 --- a/crates/maple_cli/src/cmd/filter.rs +++ b/crates/maple_cli/src/cmd/filter/mod.rs @@ -1,3 +1,7 @@ +pub mod dynamic; + +pub use dynamic::dyn_fuzzy_filter_and_rank as dyn_run; + use std::collections::HashMap; use std::path::Path; @@ -69,7 +73,7 @@ pub fn blines( number: Option, winwidth: Option, ) -> Result<()> { - run( + crate::cmd::filter::dynamic::dyn_fuzzy_filter_and_rank( query, Source::List( std::fs::read_to_string(&input)? diff --git a/crates/maple_cli/src/cmd/mod.rs b/crates/maple_cli/src/cmd/mod.rs index 222a3c1ff..7b82ed8a0 100644 --- a/crates/maple_cli/src/cmd/mod.rs +++ b/crates/maple_cli/src/cmd/mod.rs @@ -26,6 +26,18 @@ pub enum Cmd { #[structopt(short, long, possible_values = &Algo::variants(), case_insensitive = true)] algo: Option, + /// Shell command to produce the whole dataset that query is applied on. + #[structopt(short, long)] + cmd: Option, + + /// Working directory of shell command. + #[structopt(short, long)] + cmd_dir: Option, + + /// Synchronous filtering, returns after the input stream is complete. + #[structopt(short, long)] + sync: bool, + /// Read input from a file instead of stdin, only absolute file path is supported. #[structopt(long = "input", parse(from_os_str))] input: Option, diff --git a/crates/maple_cli/src/lib.rs b/crates/maple_cli/src/lib.rs index fae15c828..63406ec5f 100644 --- a/crates/maple_cli/src/lib.rs +++ b/crates/maple_cli/src/lib.rs @@ -8,7 +8,11 @@ macro_rules! println_json { } pub mod cmd; -pub use {anyhow::Result, fuzzy_filter::Source, structopt::StructOpt}; +pub use { + anyhow::Result, + fuzzy_filter::{subprocess, Source}, + structopt::StructOpt, +}; mod error; mod light_command; diff --git a/src/main.rs b/src/main.rs index f4be7b756..21c33f2ee 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use maple_cli::{ cmd::{Cmd, Maple}, - Result, Source, StructOpt, + subprocess, Result, Source, StructOpt, }; pub mod built_info { @@ -28,18 +28,44 @@ fn run(maple: Maple) -> Result<()> { Cmd::RPC => { maple_cli::cmd::rpc::run_forever(std::io::BufReader::new(std::io::stdin())); } - Cmd::Filter { query, input, algo } => { - let source = input - .map(Into::into) - .unwrap_or(Source::>::Stdin); - maple_cli::cmd::filter::run( - &query, - source, - algo, - maple.number, - maple.enable_icon, - maple.winwidth, - )?; + Cmd::Filter { + query, + input, + algo, + cmd, + cmd_dir, + sync, + } => { + let source = if let Some(cmd_str) = cmd { + if let Some(dir) = cmd_dir { + subprocess::Exec::shell(cmd_str).cwd(dir).into() + } else { + subprocess::Exec::shell(cmd_str).into() + } + } else { + input + .map(Into::into) + .unwrap_or(Source::>::Stdin) + }; + if sync { + maple_cli::cmd::filter::run( + &query, + source, + algo, + maple.number, + maple.enable_icon, + maple.winwidth, + )?; + } else { + maple_cli::cmd::filter::dyn_run( + &query, + source, + algo, + maple.number, + maple.enable_icon, + maple.winwidth, + )?; + } } Cmd::Blines { query, input } => { maple_cli::cmd::filter::blines(&query, &input, maple.number, maple.winwidth)?;