1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
use super::{LineNumberCache, Position, PositionIterator};
use failure::Fail;
use std::ops::Deref;
#[derive(Debug)]
pub struct AsciiFile<'m> {
pub mapping: &'m [u8],
line_cache: LineNumberCache,
}
#[derive(Debug, Fail)]
pub enum EncodingError {
#[fail(display = "input contains non-ascii character at {}", position)]
NotAscii { offset: usize, position: String },
}
impl<'m> AsciiFile<'m> {
pub fn lookup_cache(&self) -> &LineNumberCache {
&self.line_cache
}
pub fn new(mapping: &'m [u8]) -> Result<AsciiFile<'m>, EncodingError> {
let mut linebreaks = vec![];
for (offset, byte) in mapping.iter().enumerate() {
if !byte.is_ascii() {
let linecache = LineNumberCache::new(linebreaks);
let (row, col) = linecache.row_and_column(offset);
return Err(EncodingError::NotAscii {
offset,
position: format!("{}:{}", row + 1, col + 1),
});
}
if *byte == b'\n' {
linebreaks.push(offset);
}
}
linebreaks.shrink_to_fit();
Ok(AsciiFile {
mapping,
line_cache: LineNumberCache::new(linebreaks),
})
}
pub fn iter(&self) -> PositionIterator<'_> {
PositionIterator::new(Position::at_file_start(self))
}
}
impl<'m> Deref for AsciiFile<'m> {
type Target = str;
fn deref(&self) -> &Self::Target {
unsafe { std::str::from_utf8_unchecked(&self.mapping) }
}
}
impl<'m, 'a> Into<&'m str> for &'a AsciiFile<'m> {
fn into(self) -> &'m str {
unsafe { std::str::from_utf8_unchecked(&self.mapping) }
}
}
#[cfg(test)]
#[allow(clippy::print_stdout, clippy::use_debug)]
mod tests {
use super::*;
#[test]
fn works_with_ascii() {
let string = "ABCDEFG\n\t";
let file = AsciiFile::new(string.as_bytes()).unwrap();
let contents: &str = &file;
assert!(string == contents);
}
#[test]
fn returns_err_on_non_ascii() {
let input = "one💩two";
let file = AsciiFile::new(input.as_bytes());
assert!(file.is_err());
let e = file.err().unwrap();
println!("{:?}", e);
let EncodingError::NotAscii { offset, position } = e;
assert_eq!(offset, 3);
assert_eq!(position, "1:4");
}
#[test]
fn returns_err_on_non_ascii_non_utf8() {
let input: Vec<u16> = "ä".encode_utf16().collect();
let input: &[u8] =
unsafe { std::slice::from_raw_parts(input.as_ptr() as *const u8, 2 * input.len()) };
assert_eq!(input.len(), 2);
assert!(std::str::from_utf8(input).is_err());
let file = AsciiFile::new(&input);
assert!(file.is_err());
let EncodingError::NotAscii { offset, .. } = file.err().unwrap();
assert_eq!(offset, 0);
}
}