line-col-0.2.1/.cargo_vcs_info.json0000644000000001121371462452600126400ustar00{ "git": { "sha1": "fcfa5e4c446f058fe8687c2af5af6ef07d2eb712" } } line-col-0.2.1/.gitignore000064400000000000000000000000231370270607600133750ustar0000000000000000/target Cargo.lock line-col-0.2.1/Cargo.toml0000644000000021151371462452600106430ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "line-col" version = "0.2.1" authors = ["Nicholas Fleck "] description = "Convert string slice indices into line/column numbers" readme = "README.md" keywords = ["str", "line", "column", "parsing", "lookup"] categories = ["algorithms", "caching"] license = "MIT" repository = "https://github.com/TheBerkin/line-col-rs" [package.metadata.docs.rs] all-features = true [dependencies.unicode-segmentation] version = "1.6.0" optional = true [features] default = [] grapheme-clusters = ["unicode-segmentation"] line-col-0.2.1/Cargo.toml.orig000064400000000000000000000013341371462435000142770ustar0000000000000000[package] name = "line-col" description = "Convert string slice indices into line/column numbers" repository = "https://github.com/TheBerkin/line-col-rs" version = "0.2.1" readme = "README.md" keywords = ["str", "line", "column", "parsing", "lookup"] categories = ["algorithms", "caching"] authors = ["Nicholas Fleck "] license = "MIT" edition = "2018" # Make sure docs.rs shows grapheme-clusters stuff [package.metadata.docs.rs] all-features = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] grapheme-clusters = ["unicode-segmentation"] default = [] [dependencies] unicode-segmentation = { version = "1.6.0", optional = true }line-col-0.2.1/LICENSE000064400000000000000000000021041370273271500124130ustar0000000000000000MIT License Copyright (c) 2020 Nicholas Fleck Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. line-col-0.2.1/README.md000064400000000000000000000035651371462450100126750ustar0000000000000000# line-col [![Crates.io version shield](https://img.shields.io/crates/v/line-col)](https://crates.io/crates/line-col) [![Docs.rs shield](https://docs.rs/line-col/badge.svg)](https://docs.rs/line-col) [![Crates.io license shield](https://img.shields.io/crates/l/line-col)](https://github.com/TheBerkin/line-col-rs/blob/master/LICENSE) ![Crates.io downloads shield](https://img.shields.io/crates/d/line-col) A simple crate for calculating 1-based line/col numbers for a string slice. ## Example ```rust use line_col::LineColLookup; let text = "One\nTwo"; let lookup = LineColLookup::new(text); assert_eq!(lookup.get(0), (1, 1)); // 'O' (line 1, col 1) assert_eq!(lookup.get(1), (1, 2)); // 'n' (line 1, col 2) assert_eq!(lookup.get(2), (1, 3)); // 'e' (line 1, col 3) assert_eq!(lookup.get(4), (2, 1)); // 'T' (line 2, col 1) assert_eq!(lookup.get(5), (2, 2)); // 'w' (line 2, col 2) assert_eq!(lookup.get(6), (2, 3)); // 'o' (line 2, col 3) assert_eq!(lookup.get(7), (2, 4)); // (line 2, col 4) ``` ## Column calculation methods This crate offers two methods for calculating the column number. The `LineColLookup::get` method counts the number of bytes from the start of the line (plus one). However, this does not account for grapheme clusters (e.g. complex accented characters, emoji, etc.) If you would like to calculate the column based on the number of grapheme clusters instead, enable the `grapheme-clusters` feature and use `LineColLookup::get_by_cluster`. ## Changelog ### 0.2.1 * `LineColLookup` now defers line head table generation until first lookup ### 0.2.0 * Move cluster-specific `LineColLookup::get` implementation into its own method, `LineColLookup::get_by_cluster` * Remove unnecessary feature filters on some tests ### 0.1.1 * Fixed documentation typos * Use extended grapheme clustering ### 0.1.0 Initial versionline-col-0.2.1/src/lib.rs000064400000000000000000000145751371462425300133300ustar0000000000000000#[cfg(feature = "grapheme-clusters")] use unicode_segmentation::UnicodeSegmentation; use std::{cell::{Ref, RefCell}}; /// Pre-cached line/column lookup table for a string slice. pub struct LineColLookup<'source> { src: &'source str, line_heads: RefCell>>, } impl<'source> LineColLookup<'source> { /// Creates a new line/col lookup table. The `src` parameter provides the input string used to calculate lines and columns. /// /// Internally, this scans `src` and caches the starting positions of all lines. This means this is an O(n) operation. pub fn new(src: &'source str) -> Self { Self { src, line_heads: RefCell::new(None), } } fn heads(&self) -> Ref<'_, Option>> { if self.line_heads.borrow().is_none() { let line_heads: Vec = std::iter::once(0) .chain(self.src .char_indices() .filter_map(|(i, c)| Some(i + 1).filter(|_| c == '\n'))) .collect(); self.line_heads.replace(Some(line_heads)); } self.line_heads.borrow() } /// Looks up the 1-based line and column numbers of the specified byte index. /// /// Returns a tuple with the line number first, then column number. /// /// # Example /// ```rust /// use line_col::*; /// let text = "One\nTwo"; /// let lookup = LineColLookup::new(text); /// assert_eq!(lookup.get(0), (1, 1)); // 'O' (line 1, col 1) /// assert_eq!(lookup.get(1), (1, 2)); // 'n' (line 1, col 2) /// assert_eq!(lookup.get(2), (1, 3)); // 'e' (line 1, col 3) /// assert_eq!(lookup.get(4), (2, 1)); // 'T' (line 2, col 1) /// assert_eq!(lookup.get(5), (2, 2)); // 'w' (line 2, col 2) /// assert_eq!(lookup.get(6), (2, 3)); // 'o' (line 2, col 3) /// assert_eq!(lookup.get(7), (2, 4)); // (line 2, col 4) /// ``` /// /// # Panics /// /// Panics if `index` is greater than the length of the input `&str`. /// /// # Notes /// This function uses a binary search to locate the line on which `index` resides. /// This means that it runs in approximately O(log n) time. pub fn get(&self, index: usize) -> (usize, usize) { if index > self.src.len() { panic!("Index cannot be greater than the length of the input slice."); } if let Some(heads) = self.heads().as_ref() { // Perform a binary search to locate the line on which `index` resides let mut line_range = 0..heads.len(); while line_range.end - line_range.start > 1 { let range_middle = line_range.start + (line_range.end - line_range.start) / 2; let (left, right) = (line_range.start..range_middle, range_middle..line_range.end); // Check which line window contains our character index if (heads[left.start] .. heads[left.end]).contains(&index) { line_range = left; } else { line_range = right; } } let line_start_index = heads[line_range.start]; let line = line_range.start + 1; let col = index - line_start_index + 1; return (line, col) } unreachable!() } /// Looks up the 1-based line and column numbers of the specified byte index. /// The column number correlates to the number of grapheme clusters up to and at the specified index. /// /// Returns a tuple with the line number first, then column number. /// /// # Panics /// /// Panics if `index` is greater than the length of the input `&str`. /// /// # Notes /// This function uses a binary search to locate the line on which `index` resides. /// This means that it runs in approximately O(log n) time. #[cfg(feature = "grapheme-clusters")] pub fn get_by_cluster(&self, index: usize) -> (usize, usize) { if index > self.src.len() { panic!("Index cannot be greater than the length of the input slice."); } if let Some(heads) = self.heads().as_ref() { // Perform a binary search to locate the line on which `index` resides let mut line_range = 0..heads.len(); while line_range.end - line_range.start > 1 { let range_middle = line_range.start + (line_range.end - line_range.start) / 2; let (left, right) = (line_range.start..range_middle, range_middle..line_range.end); // Check which line window contains our character index if (heads[left.start] .. heads[left.end]).contains(&index) { line_range = left; } else { line_range = right; } } let line_start_index = heads[line_range.start]; let line = line_range.start + 1; let col = UnicodeSegmentation::graphemes(&self.src[line_start_index..index], true).count() + 1; return (line, col) } unreachable!() } } #[cfg(test)] mod tests { use crate::*; #[test] fn empty_str() { let text = ""; let lookup = LineColLookup::new(text); assert_eq!(lookup.get(0), (1, 1)); } #[test] fn line_col_iter_by_codepoints() { let text = "a\nab\nabc"; let lookup = LineColLookup::new(text); assert_eq!(lookup.get(0), (1, 1)); assert_eq!(lookup.get(1), (1, 2)); assert_eq!(lookup.get(2), (2, 1)); assert_eq!(lookup.get(3), (2, 2)); assert_eq!(lookup.get(4), (2, 3)); assert_eq!(lookup.get(5), (3, 1)); assert_eq!(lookup.get(6), (3, 2)); assert_eq!(lookup.get(7), (3, 3)); assert_eq!(lookup.get(8), (3, 4)); } #[test] #[cfg(feature = "grapheme-clusters")] fn emoji_text_by_grapheme_clusters() { let text = "The 👨‍👩‍👦 emoji is made of 5 code points and 18 bytes in UTF-8."; let lookup = LineColLookup::new(text); assert_eq!(lookup.get_by_cluster(4), (1, 5)); assert_eq!(lookup.get_by_cluster(22), (1, 6)); } #[test] fn emoji_text_by_codepoints() { let text = "The 👨‍👩‍👦 emoji is made of 5 code points and 18 bytes in UTF-8."; let lookup = LineColLookup::new(text); assert_eq!(lookup.get(4), (1, 5)); assert_eq!(lookup.get(22), (1, 23)); } }