1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
//! Caches information about linebreaks that allows us to reconstruct the line //! number and character column in O(log N). This makes sense as we only //! request line numbers in a few cases (e.g. on error). //! //! ``` //! use asciifile::LineNumberCache; //! //! let input = "asd\nqwer\nZU\nfgfh"; //! let positions = input //! .chars() //! .enumerate() //! .filter_map( //! |(offset, chr)| { //! if chr == '\n' { //! Some(offset) //! } else { //! None //! } //! }, //! ) //! .collect(); //! let cache = LineNumberCache::new(positions); //! //! let actual: Vec<_> = input //! .chars() //! .enumerate() //! .map(|(offset, chr)| (chr, cache.row_and_column(offset))) //! .collect(); //! //! let expected = vec![ //! ('a', (0, 0)), //! ('s', (0, 1)), //! ('d', (0, 2)), //! ('\n', (0, 3)), //! ('q', (1, 0)), //! ('w', (1, 1)), //! ('e', (1, 2)), //! ('r', (1, 3)), //! ('\n', (1, 4)), //! ('Z', (2, 0)), //! ('U', (2, 1)), //! ('\n', (2, 2)), //! ('f', (3, 0)), //! ('g', (3, 1)), //! ('f', (3, 2)), //! ('h', (3, 3)), //! ]; //! //! assert_eq!(actual, expected); //! ``` #[derive(Debug)] pub struct LineNumberCache { /// byte index of linebreaks (\n) in the file. The /// index of the newline within the sorted list is /// the row, which starts at 0. /// The line number is the row plus 1. linebreaks: Vec<usize>, } impl LineNumberCache { /// Return a `LineNumberCache` with UNIX-style line breaks (single `\n` /// byte) at byte offsets given in `linebreaks`. pub fn new(linebreaks: Vec<usize>) -> Self { Self { linebreaks } } /// Return the line number at `byte_offset`. /// Identical to `row() + 1`. /// /// This does not detect errors caused by `byte_offset`s that /// are out of range. Prefer the safe API of the `Position` struct /// instead. pub fn line_number(&self, byte_offset: usize) -> usize { self.row(byte_offset) + 1 } /// Return the column of the characters position within the file. /// /// The column is zero based, meaning the first characters of /// a line/row is positioned at column `0`. /// /// This does not detect errors caused by `byte_offset`s that /// are out of range. Prefer the safe API of the `Position` struct /// instead. pub fn column(&self, byte_offset: usize) -> usize { self.row_and_column(byte_offset).1 } /// Return the row of the character's position within the file. /// /// This does not detect errors caused by `byte_offset`s that /// are out of range. Prefer the safe API of the `Position` struct /// instead. pub fn row(&self, byte_offset: usize) -> usize { // // - binary_search always gives the index of the smaller number if the fiven // offset lies between two newlines. // - binary_search returns len() if the number is bigger than everything seen // => the returned index is always the row-number. match self.linebreaks.binary_search(&byte_offset) { Err(row) => row, Ok(row) => { // we position newlines at the end of the line. Meaning // the `line break` is performed after the `newline` // character row } } } /// Get row and column at once. Equivalent to calling /// `row(&self, byte_offset: usize)` and `column(&self, byte_offset: /// usize)`. /// /// This method exists because most of the time you want both. /// And it's more efficient to look up both at once. /// /// This does not detect errors caused by `byte_offset`s that /// are out of range. Prefer the safe API of the `Position` struct /// instead. pub fn row_and_column(&self, byte_offset: usize) -> (usize, usize) { let row = self.row(byte_offset); let col = if row == 0 { // no linebreak yet => first line byte_offset } else { // line that is last line or line in-between // we have to `-1` since we want to look up the // position of the line break before our current line. // // we have to `+1` since we want to look up // the position of the first character of the // line, which is one character after the newline. let line_start_offset = self.linebreaks[row - 1] + 1; byte_offset - line_start_offset }; (row, col) } } #[cfg(test)] mod tests { use super::*; #[test] fn empty_linebreaks() { let linebreaks = vec![]; let c = LineNumberCache::new(linebreaks); assert_eq!((0, 23), c.row_and_column(23)); assert_eq!(0, c.row(23)); assert_eq!(23, c.column(23)); assert_eq!(1, c.line_number(23)); } }