diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs new file mode 100644 index 000000000000..243a1374332f --- /dev/null +++ b/helix-core/src/chars.rs @@ -0,0 +1,41 @@ +/// Determine whether a character is a line break. +pub fn char_is_linebreak(c: char) -> bool { + matches!( + c, + '\u{000A}' | // LineFeed + '\u{000B}' | // VerticalTab + '\u{000C}' | // FormFeed + '\u{000D}' | // CarriageReturn + '\u{0085}' | // NextLine + '\u{2028}' | // Line Separator + '\u{2029}' // ParagraphSeparator + ) +} + +/// Determine whether a character qualifies as (non-line-break) +/// whitespace. +pub fn char_is_whitespace(c: char) -> bool { + // TODO: this is a naive binary categorization of whitespace + // characters. For display, word wrapping, etc. we'll need a better + // categorization based on e.g. breaking vs non-breaking spaces + // and whether they're zero-width or not. + match c { + //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace) + '\u{0009}' | // Character Tabulation + '\u{0020}' | // Space + '\u{00A0}' | // No-break Space + '\u{180E}' | // Mongolian Vowel Separator + '\u{202F}' | // Narrow No-break Space + '\u{205F}' | // Medium Mathematical Space + '\u{3000}' | // Ideographic Space + '\u{FEFF}' // Zero Width No-break Space + => true, + + // En Quad, Em Quad, En Space, Em Space, Three-per-em Space, + // Four-per-em Space, Six-per-em Space, Figure Space, + // Punctuation Space, Thin Space, Hair Space, Zero Width Space. + c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true, + + _ => false, + } +} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index 79a22547c90a..b11faeab0892 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -1,5 +1,6 @@ #![allow(unused)] pub mod auto_pairs; +pub mod chars; pub mod comment; pub mod diagnostic; pub mod graphemes; diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index b5f3756311bf..fb254432d63e 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -9,7 +9,7 @@ use helix_core::{ }; use helix_view::{ - document::Mode, + document::{IndentStyle, Mode}, view::{View, PADDING}, Document, DocumentId, Editor, ViewId, }; @@ -979,6 +979,26 @@ mod cmd { doc.format(view.id) } + fn set_indent_style(editor: &mut Editor, args: &[&str], event: PromptEvent) { + use IndentStyle::*; + + let style = match args.get(0) { + Some(arg) if "tabs".starts_with(&arg.to_lowercase()) => Some(Tabs), + Some(&"0") => Some(Tabs), + Some(arg) => arg + .parse::() + .ok() + .filter(|n| (1..=8).contains(n)) + .map(Spaces), + _ => None, + }; + + if let Some(s) = style { + let (_, doc) = editor.current(); + doc.indent_style = s; + } + } + fn earlier(editor: &mut Editor, args: &[&str], event: PromptEvent) { let uk = match args.join(" ").parse::() { Ok(uk) => uk, @@ -1143,6 +1163,13 @@ mod cmd { fun: format, completer: None, }, + Command { + name: "indent-style", + alias: None, + doc: "Set the indentation style for editing. ('t' for tabs or 1-8 for number of spaces.)", + fun: set_indent_style, + completer: None, + }, Command { name: "earlier", alias: Some("ear"), diff --git a/helix-term/src/ui/editor.rs b/helix-term/src/ui/editor.rs index dd385ac9a7ba..838684ba3aa4 100644 --- a/helix-term/src/ui/editor.rs +++ b/helix-term/src/ui/editor.rs @@ -11,7 +11,10 @@ use helix_core::{ syntax::{self, HighlightEvent}, Position, Range, }; -use helix_view::{document::Mode, Document, Editor, Theme, View}; +use helix_view::{ + document::{IndentStyle, Mode}, + Document, Editor, Theme, View, +}; use std::borrow::Cow; use crossterm::{ @@ -455,6 +458,10 @@ impl EditorView { theme: &Theme, is_focused: bool, ) { + //------------------------------- + // Left side of the status line. + //------------------------------- + let mode = match doc.mode() { Mode::Insert => "INS", Mode::Select => "SEL", @@ -487,24 +494,41 @@ impl EditorView { ); } - surface.set_stringn( - viewport.x + viewport.width.saturating_sub(15), - viewport.y, - format!("{}", doc.diagnostics().len()), - 4, - text_color, - ); - - // render line:col - let pos = coords_at_pos(doc.text().slice(..), doc.selection(view.id).cursor()); - - let text = format!("{}:{}", pos.row + 1, pos.col + 1); // convert to 1-indexing - let len = text.len(); + //------------------------------- + // Right side of the status line. + //------------------------------- + + // Compute the individual info strings. + let diag_count = format!("{}", doc.diagnostics().len()); + // let indent_info = match doc.indent_style { + // IndentStyle::Tabs => "tabs", + // IndentStyle::Spaces(1) => "spaces:1", + // IndentStyle::Spaces(2) => "spaces:2", + // IndentStyle::Spaces(3) => "spaces:3", + // IndentStyle::Spaces(4) => "spaces:4", + // IndentStyle::Spaces(5) => "spaces:5", + // IndentStyle::Spaces(6) => "spaces:6", + // IndentStyle::Spaces(7) => "spaces:7", + // IndentStyle::Spaces(8) => "spaces:8", + // _ => "indent:ERROR", + // }; + let position_info = { + let pos = coords_at_pos(doc.text().slice(..), doc.selection(view.id).cursor()); + format!("{}:{}", pos.row + 1, pos.col + 1) // convert to 1-indexing + }; + // Render them to the status line together. + let right_side_text = format!( + "{} {} ", + &diag_count[..diag_count.len().min(4)], + // indent_info, + position_info + ); + let text_len = right_side_text.len() as u16; surface.set_string( - viewport.x + viewport.width.saturating_sub(len as u16 + 1), + viewport.x + viewport.width.saturating_sub(text_len), viewport.y, - text, + right_side_text, text_color, ); } diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index e44759380c23..a1c4b40735e1 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -5,6 +5,7 @@ use std::path::{Component, Path, PathBuf}; use std::sync::Arc; use helix_core::{ + chars::{char_is_linebreak, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, ChangeSet, Diagnostic, Rope, Selection, State, Syntax, Transaction, @@ -21,6 +22,12 @@ pub enum Mode { Insert, } +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum IndentStyle { + Tabs, + Spaces(u8), +} + pub struct Document { // rope + selection pub(crate) id: DocumentId, @@ -33,6 +40,9 @@ pub struct Document { pub mode: Mode, pub restore_cursor: bool, + /// Current indent style. + pub indent_style: IndentStyle, + syntax: Option, // /// Corresponding language scope name. Usually `source.`. pub(crate) language: Option>, @@ -149,6 +159,7 @@ impl Document { path: None, text, selections: HashMap::default(), + indent_style: IndentStyle::Spaces(4), mode: Mode::Normal, restore_cursor: false, syntax: None, @@ -182,6 +193,7 @@ impl Document { let mut doc = Self::new(doc); // set the path and try detecting the language doc.set_path(&path)?; + doc.detect_indent_style(); Ok(doc) } @@ -265,6 +277,132 @@ impl Document { } } + fn detect_indent_style(&mut self) { + // Build a histogram of the indentation *increases* between + // subsequent lines, ignoring lines that are all whitespace. + // + // Index 0 is for tabs, the rest are 1-8 spaces. + let histogram: [usize; 9] = { + let mut histogram = [0; 9]; + let mut prev_line_is_tabs = false; + let mut prev_line_leading_count = 0usize; + + // Loop through the lines, checking for and recording indentation + // increases as we go. + 'outer: for line in self.text.lines().take(1000) { + let mut c_iter = line.chars(); + + // Is first character a tab or space? + let is_tabs = match c_iter.next() { + Some('\t') => true, + Some(' ') => false, + + // Ignore blank lines. + Some(c) if char_is_linebreak(c) => continue, + + _ => { + prev_line_is_tabs = false; + prev_line_leading_count = 0; + continue; + } + }; + + // Count the line's total leading tab/space characters. + let mut leading_count = 1; + let mut count_is_done = false; + for c in c_iter { + match c { + '\t' if is_tabs && !count_is_done => leading_count += 1, + ' ' if !is_tabs && !count_is_done => leading_count += 1, + + // We stop counting if we hit whitespace that doesn't + // qualify as indent or doesn't match the leading + // whitespace, but we don't exit the loop yet because + // we still want to determine if the line is blank. + c if char_is_whitespace(c) => count_is_done = true, + + // Ignore blank lines. + c if char_is_linebreak(c) => continue 'outer, + + _ => break, + } + + // Bound the worst-case execution time for weird text files. + if leading_count > 256 { + continue 'outer; + } + } + + // If there was an increase in indentation over the previous + // line, update the histogram with that increase. + if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0) + && prev_line_leading_count < leading_count + { + if is_tabs { + histogram[0] += 1; + } else { + let amount = leading_count - prev_line_leading_count; + if amount <= 8 { + histogram[amount] += 1; + } + } + } + + // Store this line's leading whitespace info for use with + // the next line. + prev_line_is_tabs = is_tabs; + prev_line_leading_count = leading_count; + } + + // Give more weight to tabs, because their presence is a very + // strong indicator. + histogram[0] *= 2; + + histogram + }; + + // Find the most frequent indent, its frequency, and the frequency of + // the next-most frequent indent. + let indent = histogram + .iter() + .enumerate() + .max_by_key(|kv| kv.1) + .unwrap() + .0; + let indent_freq = histogram[indent]; + let indent_freq_2 = *histogram + .iter() + .enumerate() + .filter(|kv| kv.0 != indent) + .map(|kv| kv.1) + .max() + .unwrap(); + + // Use the auto-detected result if we're confident enough in its + // accuracy, based on some heuristics. Otherwise fall back to + // the language-based setting. + if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 { + // Use the auto-detected setting. + self.indent_style = match indent { + 0 => IndentStyle::Tabs, + _ => IndentStyle::Spaces(indent as u8), + }; + } else { + // Fall back to language-based setting. + let indent = self + .language + .as_ref() + .and_then(|config| config.indent.as_ref()) + .map_or(" ", |config| config.unit.as_str()); // fallback to 2 spaces + + self.indent_style = if indent.starts_with(' ') { + IndentStyle::Spaces(indent.len() as u8) + } else { + IndentStyle::Tabs + }; + } + } + pub fn set_path(&mut self, path: &Path) -> Result<(), std::io::Error> { let path = canonicalize_path(path)?; @@ -507,13 +645,25 @@ impl Document { } /// Returns a string containing a single level of indentation. - pub fn indent_unit(&self) -> &str { - self.language - .as_ref() - .and_then(|config| config.indent.as_ref()) - .map_or(" ", |config| config.unit.as_str()) // fallback to 2 spaces - - // " ".repeat(TAB_WIDTH) + /// + /// TODO: we might not need this function anymore, since the information + /// is conveniently available in `Document::indent_style` now. + pub fn indent_unit(&self) -> &'static str { + match self.indent_style { + IndentStyle::Tabs => "\t", + IndentStyle::Spaces(1) => " ", + IndentStyle::Spaces(2) => " ", + IndentStyle::Spaces(3) => " ", + IndentStyle::Spaces(4) => " ", + IndentStyle::Spaces(5) => " ", + IndentStyle::Spaces(6) => " ", + IndentStyle::Spaces(7) => " ", + IndentStyle::Spaces(8) => " ", + + // Unsupported indentation style. This should never happen, + // but just in case fall back to two spaces. + _ => " ", + } } #[inline]