compose_codespan_reporting/files.rs
1//! Source file support for diagnostic reporting.
2//!
3//! The main trait defined in this module is the [`Files`] trait, which provides
4//! provides the minimum amount of functionality required for printing [`Diagnostics`]
5//! with the [`term::emit`] function.
6//!
7//! Simple implementations of this trait are implemented:
8//!
9//! - [`SimpleFile`]: For single-file use-cases
10//! - [`SimpleFiles`]: For multi-file use-cases
11//!
12//! These data structures provide a pretty minimal API, however,
13//! so end-users are encouraged to create their own implementations for their
14//! own specific use-cases, such as an implementation that accesses the file
15//! system directly (and caches the line start locations), or an implementation
16//! using an incremental compilation library like [`salsa`].
17//!
18//! [`term::emit`]: crate::term::emit
19//! [`Diagnostics`]: crate::diagnostic::Diagnostic
20//! [`Files`]: Files
21//! [`SimpleFile`]: SimpleFile
22//! [`SimpleFiles`]: SimpleFiles
23//!
24//! [`salsa`]: https://crates.io/crates/salsa
25
26use std::ops::{Bound, Range};
27
28/// An enum representing an error that happened while looking up a file or a piece of content in that file.
29#[derive(Debug)]
30#[non_exhaustive]
31pub enum Error {
32 /// A required file is not in the file database.
33 FileMissing,
34 /// The file is present, but does not contain the specified byte index.
35 IndexTooLarge { given: usize, max: usize },
36 /// The file is present, but does not contain the specified line index.
37 LineTooLarge { given: usize, max: usize },
38 /// The file is present and contains the specified line index, but the line does not contain the specified column index.
39 ColumnTooLarge { given: usize, max: usize },
40 /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.
41 InvalidCharBoundary { given: usize },
42 /// The given slice is contained in the file, but is not a valid UTF-8 string due to its
43 /// boundaries.
44 InvalidSlice { given: (Bound<usize>, Bound<usize>) },
45 /// There was a error while doing IO.
46 Io(std::io::Error),
47}
48
49impl From<std::io::Error> for Error {
50 fn from(err: std::io::Error) -> Error {
51 Error::Io(err)
52 }
53}
54
55impl std::fmt::Display for Error {
56 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57 match self {
58 Error::FileMissing => write!(f, "file missing"),
59 Error::IndexTooLarge { given, max } => {
60 write!(f, "invalid index {}, maximum index is {}", given, max)
61 }
62 Error::LineTooLarge { given, max } => {
63 write!(f, "invalid line {}, maximum line is {}", given, max)
64 }
65 Error::ColumnTooLarge { given, max } => {
66 write!(f, "invalid column {}, maximum column {}", given, max)
67 }
68 Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),
69 Error::InvalidSlice { .. } => write!(f, "slice endpoint is not a code point boundary"),
70 Error::Io(err) => write!(f, "{}", err),
71 }
72 }
73}
74
75impl std::error::Error for Error {
76 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
77 match &self {
78 Error::Io(err) => Some(err),
79 _ => None,
80 }
81 }
82}
83
84/// A minimal interface for accessing source files when rendering diagnostics.
85///
86/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.
87/// This is to workaround the lack of higher kinded lifetime parameters.
88/// This can be ignored if this is not needed, however.
89pub trait Files<'a> {
90 /// A unique identifier for files in the file provider. This will be used
91 /// for rendering `diagnostic::Label`s in the corresponding source files.
92 type FileId: 'a + Copy + PartialEq;
93 /// The user-facing name of a file, to be displayed in diagnostics.
94 type Name: 'a + std::fmt::Display;
95 /// The source code of a file.
96 type Source: 'a + AsRef<str>;
97
98 /// The user-facing name of a file.
99 fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;
100
101 /// The source code of a file.
102 fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;
103
104 /// The index of the line at the given byte index.
105 /// If the byte index is past the end of the file, returns the maximum line index in the file.
106 /// This means that this function only fails if the file is not present.
107 ///
108 /// # Note for trait implementors
109 ///
110 /// This can be implemented efficiently by performing a binary search over
111 /// a list of line starts that was computed by calling the [`line_starts`]
112 /// function that is exported from the [`files`] module. It might be useful
113 /// to pre-compute and cache these line starts.
114 ///
115 /// [`line_starts`]: crate::files::line_starts
116 /// [`files`]: crate::files
117 fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;
118
119 /// The user-facing line number at the given line index.
120 /// It is not necessarily checked that the specified line index
121 /// is actually in the file.
122 ///
123 /// # Note for trait implementors
124 ///
125 /// This is usually 1-indexed from the beginning of the file, but
126 /// can be useful for implementing something like the
127 /// [C preprocessor's `#line` macro][line-macro].
128 ///
129 /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
130 #[allow(unused_variables)]
131 fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {
132 Ok(line_index + 1)
133 }
134
135 /// The user-facing column number at the given line index and byte index.
136 ///
137 /// # Note for trait implementors
138 ///
139 /// This is usually 1-indexed from the the start of the line.
140 /// A default implementation is provided, based on the [`column_index`]
141 /// function that is exported from the [`files`] module.
142 ///
143 /// [`files`]: crate::files
144 /// [`column_index`]: crate::files::column_index
145 fn column_number(
146 &'a self,
147 id: Self::FileId,
148 line_index: usize,
149 byte_index: usize,
150 ) -> Result<usize, Error> {
151 let source = self.source(id)?;
152 let line_range = self.line_range(id, line_index)?;
153 let column_index = column_index(source.as_ref(), line_range, byte_index);
154
155 Ok(column_index + 1)
156 }
157
158 /// Convenience method for returning line and column number at the given
159 /// byte index in the file.
160 fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {
161 let line_index = self.line_index(id, byte_index)?;
162
163 Ok(Location {
164 line_number: self.line_number(id, line_index)?,
165 column_number: self.column_number(id, line_index, byte_index)?,
166 })
167 }
168
169 /// The byte range of line in the source of the file.
170 fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;
171}
172
173/// A user-facing location in a source file.
174///
175/// Returned by [`Files::location`].
176///
177/// [`Files::location`]: Files::location
178#[derive(Debug, Copy, Clone, PartialEq, Eq)]
179pub struct Location {
180 /// The user-facing line number.
181 pub line_number: usize,
182 /// The user-facing column number.
183 pub column_number: usize,
184}
185
186/// The column index at the given byte index in the source file.
187/// This is the number of characters to the given byte index.
188///
189/// If the byte index is smaller than the start of the line, then `0` is returned.
190/// If the byte index is past the end of the line, the column index of the last
191/// character `+ 1` is returned.
192///
193/// # Example
194///
195/// ```rust
196/// use compose_codespan_reporting::files;
197///
198/// let source = "\n\nš»āš\n\n";
199///
200/// assert_eq!(files::column_index(source, 0..1, 0), 0);
201/// assert_eq!(files::column_index(source, 2..13, 0), 0);
202/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);
203/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);
204/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);
205/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);
206/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);
207/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);
208/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);
209/// ```
210pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {
211 let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len()));
212
213 (line_range.start..end_index)
214 .filter(|byte_index| source.is_char_boundary(byte_index + 1))
215 .count()
216}
217
218/// Return the starting byte index of each line in the source string.
219///
220/// This can make it easier to implement [`Files::line_index`] by allowing
221/// implementors of [`Files`] to pre-compute the line starts, then search for
222/// the corresponding line range, as shown in the example below.
223///
224/// [`Files`]: Files
225/// [`Files::line_index`]: Files::line_index
226///
227/// # Example
228///
229/// ```rust
230/// use compose_codespan_reporting::files;
231///
232/// let source = "foo\nbar\r\n\nbaz";
233/// let line_starts: Vec<_> = files::line_starts(source).collect();
234///
235/// assert_eq!(
236/// line_starts,
237/// [
238/// 0, // "foo\n"
239/// 4, // "bar\r\n"
240/// 9, // ""
241/// 10, // "baz"
242/// ],
243/// );
244///
245/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {
246/// match line_starts.binary_search(&byte_index) {
247/// Ok(line) => Some(line),
248/// Err(next_line) => Some(next_line - 1),
249/// }
250/// }
251///
252/// assert_eq!(line_index(&line_starts, 5), Some(1));
253/// ```
254// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.
255pub fn line_starts(source: &str) -> impl '_ + Iterator<Item = usize> {
256 std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
257}
258
259/// A file database that contains a single source file.
260///
261/// Because there is only single file in this database we use `()` as a [`FileId`].
262///
263/// This is useful for simple language tests, but it might be worth creating a
264/// custom implementation when a language scales beyond a certain size.
265///
266/// [`FileId`]: Files::FileId
267#[derive(Debug, Clone)]
268pub struct SimpleFile<Name, Source> {
269 /// The name of the file.
270 name: Name,
271 /// The source code of the file.
272 source: Source,
273 /// The starting byte indices in the source code.
274 line_starts: Vec<usize>,
275}
276
277impl<Name, Source> SimpleFile<Name, Source>
278where
279 Name: std::fmt::Display,
280 Source: AsRef<str>,
281{
282 /// Create a new source file.
283 pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {
284 SimpleFile {
285 name,
286 line_starts: line_starts(source.as_ref()).collect(),
287 source,
288 }
289 }
290
291 /// Return the name of the file.
292 pub fn name(&self) -> &Name {
293 &self.name
294 }
295
296 /// Return the source of the file.
297 pub fn source(&self) -> &Source {
298 &self.source
299 }
300
301 /// Return the starting byte index of the line with the specified line index.
302 /// Convenience method that already generates errors if necessary.
303 fn line_start(&self, line_index: usize) -> Result<usize, Error> {
304 use std::cmp::Ordering;
305
306 match line_index.cmp(&self.line_starts.len()) {
307 Ordering::Less => Ok(self
308 .line_starts
309 .get(line_index)
310 .cloned()
311 .expect("failed despite previous check")),
312 Ordering::Equal => Ok(self.source.as_ref().len()),
313 Ordering::Greater => Err(Error::LineTooLarge {
314 given: line_index,
315 max: self.line_starts.len() - 1,
316 }),
317 }
318 }
319}
320
321impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>
322where
323 Name: 'a + std::fmt::Display + Clone,
324 Source: 'a + AsRef<str>,
325{
326 type FileId = ();
327 type Name = Name;
328 type Source = &'a str;
329
330 fn name(&self, (): ()) -> Result<Name, Error> {
331 Ok(self.name.clone())
332 }
333
334 fn source(&self, (): ()) -> Result<&str, Error> {
335 Ok(self.source.as_ref())
336 }
337
338 fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {
339 Ok(self
340 .line_starts
341 .binary_search(&byte_index)
342 .unwrap_or_else(|next_line| next_line - 1))
343 }
344
345 fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {
346 let line_start = self.line_start(line_index)?;
347 let next_line_start = self.line_start(line_index + 1)?;
348
349 Ok(line_start..next_line_start)
350 }
351}
352
353/// A file database that can store multiple source files.
354///
355/// This is useful for simple language tests, but it might be worth creating a
356/// custom implementation when a language scales beyond a certain size.
357/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.
358#[derive(Debug, Default, Clone)]
359pub struct SimpleFiles<Name, Source> {
360 files: Vec<SimpleFile<Name, Source>>,
361}
362
363impl<Name, Source> SimpleFiles<Name, Source>
364where
365 Name: std::fmt::Display,
366 Source: AsRef<str>,
367{
368 /// Create a new files database.
369 pub fn new() -> SimpleFiles<Name, Source> {
370 SimpleFiles { files: Vec::new() }
371 }
372
373 /// Add a file to the database, returning the handle that can be used to
374 /// refer to it again.
375 pub fn add(&mut self, name: Name, source: Source) -> usize {
376 let file_id = self.files.len();
377 self.files.push(SimpleFile::new(name, source));
378 file_id
379 }
380
381 /// Get the file corresponding to the given id.
382 pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {
383 self.files.get(file_id).ok_or(Error::FileMissing)
384 }
385}
386
387impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>
388where
389 Name: 'a + std::fmt::Display + Clone,
390 Source: 'a + AsRef<str>,
391{
392 type FileId = usize;
393 type Name = Name;
394 type Source = &'a str;
395
396 fn name(&self, file_id: usize) -> Result<Name, Error> {
397 Ok(self.get(file_id)?.name().clone())
398 }
399
400 fn source(&self, file_id: usize) -> Result<&str, Error> {
401 Ok(self.get(file_id)?.source().as_ref())
402 }
403
404 fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {
405 self.get(file_id)?.line_index((), byte_index)
406 }
407
408 fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {
409 self.get(file_id)?.line_range((), line_index)
410 }
411}
412
413#[cfg(test)]
414mod test {
415 use super::*;
416
417 const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
418
419 #[test]
420 fn line_starts() {
421 let file = SimpleFile::new("test", TEST_SOURCE);
422
423 assert_eq!(
424 file.line_starts,
425 [
426 0, // "foo\n"
427 4, // "bar\r\n"
428 9, // ""
429 10, // "baz"
430 ],
431 );
432 }
433
434 #[test]
435 fn line_span_sources() {
436 let file = SimpleFile::new("test", TEST_SOURCE);
437
438 let line_sources = (0..4)
439 .map(|line| {
440 let line_range = file.line_range((), line).unwrap();
441 &file.source[line_range]
442 })
443 .collect::<Vec<_>>();
444
445 assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);
446 }
447}