compose_codespan_reporting/
files.rs

1//! Source file support for diagnostic reporting.
2//!
3//! The main trait defined in this module is the [`Files`] trait, which provides
4//! provides the minimum amount of functionality required for printing [`Diagnostics`]
5//! with the [`term::emit`] function.
6//!
7//! Simple implementations of this trait are implemented:
8//!
9//! - [`SimpleFile`]: For single-file use-cases
10//! - [`SimpleFiles`]: For multi-file use-cases
11//!
12//! These data structures provide a pretty minimal API, however,
13//! so end-users are encouraged to create their own implementations for their
14//! own specific use-cases, such as an implementation that accesses the file
15//! system directly (and caches the line start locations), or an implementation
16//! using an incremental compilation library like [`salsa`].
17//!
18//! [`term::emit`]: crate::term::emit
19//! [`Diagnostics`]: crate::diagnostic::Diagnostic
20//! [`Files`]: Files
21//! [`SimpleFile`]: SimpleFile
22//! [`SimpleFiles`]: SimpleFiles
23//!
24//! [`salsa`]: https://crates.io/crates/salsa
25
26use std::ops::{Bound, Range};
27
28/// An enum representing an error that happened while looking up a file or a piece of content in that file.
29#[derive(Debug)]
30#[non_exhaustive]
31pub enum Error {
32    /// A required file is not in the file database.
33    FileMissing,
34    /// The file is present, but does not contain the specified byte index.
35    IndexTooLarge { given: usize, max: usize },
36    /// The file is present, but does not contain the specified line index.
37    LineTooLarge { given: usize, max: usize },
38    /// The file is present and contains the specified line index, but the line does not contain the specified column index.
39    ColumnTooLarge { given: usize, max: usize },
40    /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.
41    InvalidCharBoundary { given: usize },
42    /// The given slice is contained in the file, but is not a valid UTF-8 string due to its
43    /// boundaries.
44    InvalidSlice { given: (Bound<usize>, Bound<usize>) },
45    /// There was a error while doing IO.
46    Io(std::io::Error),
47}
48
49impl From<std::io::Error> for Error {
50    fn from(err: std::io::Error) -> Error {
51        Error::Io(err)
52    }
53}
54
55impl std::fmt::Display for Error {
56    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57        match self {
58            Error::FileMissing => write!(f, "file missing"),
59            Error::IndexTooLarge { given, max } => {
60                write!(f, "invalid index {}, maximum index is {}", given, max)
61            }
62            Error::LineTooLarge { given, max } => {
63                write!(f, "invalid line {}, maximum line is {}", given, max)
64            }
65            Error::ColumnTooLarge { given, max } => {
66                write!(f, "invalid column {}, maximum column {}", given, max)
67            }
68            Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),
69            Error::InvalidSlice { .. } => write!(f, "slice endpoint is not a code point boundary"),
70            Error::Io(err) => write!(f, "{}", err),
71        }
72    }
73}
74
75impl std::error::Error for Error {
76    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
77        match &self {
78            Error::Io(err) => Some(err),
79            _ => None,
80        }
81    }
82}
83
84/// A minimal interface for accessing source files when rendering diagnostics.
85///
86/// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.
87/// This is to workaround the lack of higher kinded lifetime parameters.
88/// This can be ignored if this is not needed, however.
89pub trait Files<'a> {
90    /// A unique identifier for files in the file provider. This will be used
91    /// for rendering `diagnostic::Label`s in the corresponding source files.
92    type FileId: 'a + Copy + PartialEq;
93    /// The user-facing name of a file, to be displayed in diagnostics.
94    type Name: 'a + std::fmt::Display;
95    /// The source code of a file.
96    type Source: 'a + AsRef<str>;
97
98    /// The user-facing name of a file.
99    fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;
100
101    /// The source code of a file.
102    fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;
103
104    /// The index of the line at the given byte index.
105    /// If the byte index is past the end of the file, returns the maximum line index in the file.
106    /// This means that this function only fails if the file is not present.
107    ///
108    /// # Note for trait implementors
109    ///
110    /// This can be implemented efficiently by performing a binary search over
111    /// a list of line starts that was computed by calling the [`line_starts`]
112    /// function that is exported from the [`files`] module. It might be useful
113    /// to pre-compute and cache these line starts.
114    ///
115    /// [`line_starts`]: crate::files::line_starts
116    /// [`files`]: crate::files
117    fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;
118
119    /// The user-facing line number at the given line index.
120    /// It is not necessarily checked that the specified line index
121    /// is actually in the file.
122    ///
123    /// # Note for trait implementors
124    ///
125    /// This is usually 1-indexed from the beginning of the file, but
126    /// can be useful for implementing something like the
127    /// [C preprocessor's `#line` macro][line-macro].
128    ///
129    /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
130    #[allow(unused_variables)]
131    fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {
132        Ok(line_index + 1)
133    }
134
135    /// The user-facing column number at the given line index and byte index.
136    ///
137    /// # Note for trait implementors
138    ///
139    /// This is usually 1-indexed from the the start of the line.
140    /// A default implementation is provided, based on the [`column_index`]
141    /// function that is exported from the [`files`] module.
142    ///
143    /// [`files`]: crate::files
144    /// [`column_index`]: crate::files::column_index
145    fn column_number(
146        &'a self,
147        id: Self::FileId,
148        line_index: usize,
149        byte_index: usize,
150    ) -> Result<usize, Error> {
151        let source = self.source(id)?;
152        let line_range = self.line_range(id, line_index)?;
153        let column_index = column_index(source.as_ref(), line_range, byte_index);
154
155        Ok(column_index + 1)
156    }
157
158    /// Convenience method for returning line and column number at the given
159    /// byte index in the file.
160    fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {
161        let line_index = self.line_index(id, byte_index)?;
162
163        Ok(Location {
164            line_number: self.line_number(id, line_index)?,
165            column_number: self.column_number(id, line_index, byte_index)?,
166        })
167    }
168
169    /// The byte range of line in the source of the file.
170    fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;
171}
172
173/// A user-facing location in a source file.
174///
175/// Returned by [`Files::location`].
176///
177/// [`Files::location`]: Files::location
178#[derive(Debug, Copy, Clone, PartialEq, Eq)]
179pub struct Location {
180    /// The user-facing line number.
181    pub line_number: usize,
182    /// The user-facing column number.
183    pub column_number: usize,
184}
185
186/// The column index at the given byte index in the source file.
187/// This is the number of characters to the given byte index.
188///
189/// If the byte index is smaller than the start of the line, then `0` is returned.
190/// If the byte index is past the end of the line, the column index of the last
191/// character `+ 1` is returned.
192///
193/// # Example
194///
195/// ```rust
196/// use compose_codespan_reporting::files;
197///
198/// let source = "\n\nšŸ—»āˆˆšŸŒ\n\n";
199///
200/// assert_eq!(files::column_index(source, 0..1, 0), 0);
201/// assert_eq!(files::column_index(source, 2..13, 0), 0);
202/// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);
203/// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);
204/// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);
205/// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);
206/// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);
207/// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);
208/// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);
209/// ```
210pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {
211    let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len()));
212
213    (line_range.start..end_index)
214        .filter(|byte_index| source.is_char_boundary(byte_index + 1))
215        .count()
216}
217
218/// Return the starting byte index of each line in the source string.
219///
220/// This can make it easier to implement [`Files::line_index`] by allowing
221/// implementors of [`Files`] to pre-compute the line starts, then search for
222/// the corresponding line range, as shown in the example below.
223///
224/// [`Files`]: Files
225/// [`Files::line_index`]: Files::line_index
226///
227/// # Example
228///
229/// ```rust
230/// use compose_codespan_reporting::files;
231///
232/// let source = "foo\nbar\r\n\nbaz";
233/// let line_starts: Vec<_> = files::line_starts(source).collect();
234///
235/// assert_eq!(
236///     line_starts,
237///     [
238///         0,  // "foo\n"
239///         4,  // "bar\r\n"
240///         9,  // ""
241///         10, // "baz"
242///     ],
243/// );
244///
245/// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {
246///     match line_starts.binary_search(&byte_index) {
247///         Ok(line) => Some(line),
248///         Err(next_line) => Some(next_line - 1),
249///     }
250/// }
251///
252/// assert_eq!(line_index(&line_starts, 5), Some(1));
253/// ```
254// NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.
255pub fn line_starts(source: &str) -> impl '_ + Iterator<Item = usize> {
256    std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
257}
258
259/// A file database that contains a single source file.
260///
261/// Because there is only single file in this database we use `()` as a [`FileId`].
262///
263/// This is useful for simple language tests, but it might be worth creating a
264/// custom implementation when a language scales beyond a certain size.
265///
266/// [`FileId`]: Files::FileId
267#[derive(Debug, Clone)]
268pub struct SimpleFile<Name, Source> {
269    /// The name of the file.
270    name: Name,
271    /// The source code of the file.
272    source: Source,
273    /// The starting byte indices in the source code.
274    line_starts: Vec<usize>,
275}
276
277impl<Name, Source> SimpleFile<Name, Source>
278where
279    Name: std::fmt::Display,
280    Source: AsRef<str>,
281{
282    /// Create a new source file.
283    pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {
284        SimpleFile {
285            name,
286            line_starts: line_starts(source.as_ref()).collect(),
287            source,
288        }
289    }
290
291    /// Return the name of the file.
292    pub fn name(&self) -> &Name {
293        &self.name
294    }
295
296    /// Return the source of the file.
297    pub fn source(&self) -> &Source {
298        &self.source
299    }
300
301    /// Return the starting byte index of the line with the specified line index.
302    /// Convenience method that already generates errors if necessary.
303    fn line_start(&self, line_index: usize) -> Result<usize, Error> {
304        use std::cmp::Ordering;
305
306        match line_index.cmp(&self.line_starts.len()) {
307            Ordering::Less => Ok(self
308                .line_starts
309                .get(line_index)
310                .cloned()
311                .expect("failed despite previous check")),
312            Ordering::Equal => Ok(self.source.as_ref().len()),
313            Ordering::Greater => Err(Error::LineTooLarge {
314                given: line_index,
315                max: self.line_starts.len() - 1,
316            }),
317        }
318    }
319}
320
321impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>
322where
323    Name: 'a + std::fmt::Display + Clone,
324    Source: 'a + AsRef<str>,
325{
326    type FileId = ();
327    type Name = Name;
328    type Source = &'a str;
329
330    fn name(&self, (): ()) -> Result<Name, Error> {
331        Ok(self.name.clone())
332    }
333
334    fn source(&self, (): ()) -> Result<&str, Error> {
335        Ok(self.source.as_ref())
336    }
337
338    fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {
339        Ok(self
340            .line_starts
341            .binary_search(&byte_index)
342            .unwrap_or_else(|next_line| next_line - 1))
343    }
344
345    fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {
346        let line_start = self.line_start(line_index)?;
347        let next_line_start = self.line_start(line_index + 1)?;
348
349        Ok(line_start..next_line_start)
350    }
351}
352
353/// A file database that can store multiple source files.
354///
355/// This is useful for simple language tests, but it might be worth creating a
356/// custom implementation when a language scales beyond a certain size.
357/// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.
358#[derive(Debug, Default, Clone)]
359pub struct SimpleFiles<Name, Source> {
360    files: Vec<SimpleFile<Name, Source>>,
361}
362
363impl<Name, Source> SimpleFiles<Name, Source>
364where
365    Name: std::fmt::Display,
366    Source: AsRef<str>,
367{
368    /// Create a new files database.
369    pub fn new() -> SimpleFiles<Name, Source> {
370        SimpleFiles { files: Vec::new() }
371    }
372
373    /// Add a file to the database, returning the handle that can be used to
374    /// refer to it again.
375    pub fn add(&mut self, name: Name, source: Source) -> usize {
376        let file_id = self.files.len();
377        self.files.push(SimpleFile::new(name, source));
378        file_id
379    }
380
381    /// Get the file corresponding to the given id.
382    pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {
383        self.files.get(file_id).ok_or(Error::FileMissing)
384    }
385}
386
387impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>
388where
389    Name: 'a + std::fmt::Display + Clone,
390    Source: 'a + AsRef<str>,
391{
392    type FileId = usize;
393    type Name = Name;
394    type Source = &'a str;
395
396    fn name(&self, file_id: usize) -> Result<Name, Error> {
397        Ok(self.get(file_id)?.name().clone())
398    }
399
400    fn source(&self, file_id: usize) -> Result<&str, Error> {
401        Ok(self.get(file_id)?.source().as_ref())
402    }
403
404    fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {
405        self.get(file_id)?.line_index((), byte_index)
406    }
407
408    fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {
409        self.get(file_id)?.line_range((), line_index)
410    }
411}
412
413#[cfg(test)]
414mod test {
415    use super::*;
416
417    const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
418
419    #[test]
420    fn line_starts() {
421        let file = SimpleFile::new("test", TEST_SOURCE);
422
423        assert_eq!(
424            file.line_starts,
425            [
426                0,  // "foo\n"
427                4,  // "bar\r\n"
428                9,  // ""
429                10, // "baz"
430            ],
431        );
432    }
433
434    #[test]
435    fn line_span_sources() {
436        let file = SimpleFile::new("test", TEST_SOURCE);
437
438        let line_sources = (0..4)
439            .map(|line| {
440                let line_range = file.line_range((), line).unwrap();
441                &file.source[line_range]
442            })
443            .collect::<Vec<_>>();
444
445        assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);
446    }
447}