Skip to content

Commit

Permalink
Make an enum for different input sources (tweag#1560)
Browse files Browse the repository at this point in the history
* Make an enum for different input sources

* Fix doc link

* Review comments

* Add a comment
  • Loading branch information
jneem authored and Ben committed Sep 17, 2023
1 parent 7b3baa1 commit 30ac61e
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 103 deletions.
168 changes: 110 additions & 58 deletions core/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ impl InputFormat {
pub struct Cache {
/// The content of the program sources plus imports.
files: Files<String>,
file_paths: HashMap<FileId, SourcePath>,
/// The name-id table, holding file ids stored in the database indexed by source names.
file_ids: HashMap<OsString, NameIdEntry>,
file_ids: HashMap<SourcePath, NameIdEntry>,
/// Map containing for each FileId a list of files they import (directly).
imports: HashMap<FileId, HashSet<FileId>>,
/// Map containing for each FileId a list of files importing them (directly).
Expand Down Expand Up @@ -129,7 +130,7 @@ pub struct TermEntry {
/// are not auto-refreshed. If an in-memory buffer has a path that also exists in the
/// filesystem, we will not even check that file to see if it has changed.
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
enum Source {
enum SourceKind {
Filesystem(SystemTime),
Memory,
}
Expand All @@ -150,7 +151,7 @@ enum Source {
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
pub struct NameIdEntry {
id: FileId,
source: Source,
source: SourceKind,
}

/// The state of an entry of the term cache.
Expand Down Expand Up @@ -221,6 +222,68 @@ impl<E> CacheError<E> {
}
}

/// Input data usually comes from files on the file system, but there are also
/// lots of cases where we want to synthesize other kinds of inputs.
///
/// Note that a `SourcePath` does not uniquely identify a cached input:
/// - Some functions (like [`Cache::add_file`]) add a new cached input unconditionally.
/// - [`Cache::get_or_add_file`] will add a new cached input at the same `SourcePath` if
/// the file on disk was updated.
///
/// The equality checking of `SourcePath` only affects [`Cache::replace_string`], which
/// overwrites any previous cached input with the same `SourcePath`.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub enum SourcePath {
/// A file at the given path.
///
/// Note that this does not need to be a real file on the filesystem: it could still
/// be loaded from memory by, e.g, [`Cache::add_string`].
///
/// This is the only `SourcePath` variant that can be resolved as the target
/// of an import statement.
Path(PathBuf),
/// A subrange of a file at the given path.
///
/// This is used by nls to analyze small parts of files that don't fully parse. The
/// original file path is preserved, because it's needed for resolving imports.
Snippet(PathBuf),
Std(StdlibModule),
Query,
ReplInput(usize),
ReplTypecheck,
ReplQuery,
Override(Vec<String>),
}

impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
type Error = ();

fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
match value {
SourcePath::Path(p) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
_ => Err(()),
}
}
}

// [`Files`] needs to have an OsString for each file, so we synthesize names even for
// sources that don't have them. They don't need to be unique; they're just used for
// diagnostics.
impl From<SourcePath> for OsString {
fn from(source_path: SourcePath) -> Self {
match source_path {
SourcePath::Path(p) | SourcePath::Snippet(p) => p.into(),
SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
SourcePath::Query => "<query>".into(),
SourcePath::ReplInput(idx) => format!("<repl-input-{idx}").into(),
SourcePath::ReplTypecheck => "<repl-typecheck>".into(),
SourcePath::ReplQuery => "<repl-query>".into(),
SourcePath::Override(path) => format!("<override {}>", path.join(".")).into(),
}
}
}

/// Return status indicating if an import has been resolved from a file (first encounter), or was
/// retrieved from the cache.
///
Expand All @@ -246,6 +309,7 @@ impl Cache {
Cache {
files: Files::new(),
file_ids: HashMap::new(),
file_paths: HashMap::new(),
terms: HashMap::new(),
wildcards: HashMap::new(),
imports: HashMap::new(),
Expand All @@ -258,29 +322,18 @@ impl Cache {
}
}

/// Load a file in the file database. Do not insert an entry in the name-id table.
fn load_file(&mut self, path: impl Into<OsString>) -> io::Result<FileId> {
let path = path.into();
let mut buffer = String::new();
fs::File::open(&path)
.and_then(|mut file| file.read_to_string(&mut buffer))
.map(|_| self.files.add(path, buffer))
}

/// Same as [Self::add_file], but assume that the path is already normalized, and take the
/// timestamp as a parameter.
fn add_file_(
&mut self,
path: impl Into<OsString>,
timestamp: SystemTime,
) -> io::Result<FileId> {
let path = path.into();
let file_id = self.load_file(path.clone())?;
fn add_file_(&mut self, path: PathBuf, timestamp: SystemTime) -> io::Result<FileId> {
let contents = std::fs::read_to_string(&path)?;
let file_id = self.files.add(&path, contents);
self.file_paths
.insert(file_id, SourcePath::Path(path.clone()));
self.file_ids.insert(
path,
SourcePath::Path(path),
NameIdEntry {
id: file_id,
source: Source::Filesystem(timestamp),
source: SourceKind::Filesystem(timestamp),
},
);
Ok(file_id)
Expand All @@ -303,7 +356,7 @@ impl Cache {
pub fn get_or_add_file(&mut self, path: impl Into<OsString>) -> io::Result<CacheOp<FileId>> {
let path = path.into();
let normalized = normalize_path(&path)?;
match self.id_or_new_timestamp_of(&path)? {
match self.id_or_new_timestamp_of(path.as_ref())? {
SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
SourceState::Stale(timestamp) => {
self.add_file_(normalized, timestamp).map(CacheOp::Done)
Expand All @@ -315,10 +368,9 @@ impl Cache {
///
/// Do not check if a source with the same name already exists: if it is the
/// case, this one will override the old entry in the name-id table.
pub fn add_source<T, S>(&mut self, source_name: S, mut source: T) -> io::Result<FileId>
pub fn add_source<T>(&mut self, source_name: SourcePath, mut source: T) -> io::Result<FileId>
where
T: Read,
S: Into<OsString>,
{
let mut buffer = String::new();
source.read_to_string(&mut buffer)?;
Expand All @@ -333,14 +385,14 @@ impl Cache {
///
/// Do not check if a source with the same name already exists: if it is the case, this one
/// will override the old entry in the name-id table but the old `FileId` will remain valid.
pub fn add_string(&mut self, source_name: impl Into<OsString>, s: String) -> FileId {
let source_name = source_name.into();
pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId {
let id = self.files.add(source_name.clone(), s);
self.file_paths.insert(id, source_name.clone());
self.file_ids.insert(
source_name,
NameIdEntry {
id,
source: Source::Memory,
source: SourceKind::Memory,
},
);
id
Expand All @@ -353,19 +405,19 @@ impl Cache {
///
/// Used to store intermediate short-lived generated snippets that needs to have a
/// corresponding `FileId`, such as when querying or reporting errors.
pub fn replace_string(&mut self, source_name: impl Into<OsString>, s: String) -> FileId {
let source_name = source_name.into();
pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
if let Some(file_id) = self.id_of(&source_name) {
self.files.update(file_id, s);
self.terms.remove(&file_id);
file_id
} else {
let file_id = self.files.add(source_name.clone(), s);
self.file_paths.insert(file_id, source_name.clone());
self.file_ids.insert(
source_name,
NameIdEntry {
id: file_id,
source: Source::Memory,
source: SourceKind::Memory,
},
);
file_id
Expand Down Expand Up @@ -878,10 +930,13 @@ impl Cache {
///
/// Note that files added via [Self::add_file] are indexed by their full normalized path (cf
/// [normalize_path]).
pub fn id_of(&self, name: impl AsRef<OsStr>) -> Option<FileId> {
match self.id_or_new_timestamp_of(name).ok()? {
SourceState::UpToDate(id) => Some(id),
SourceState::Stale(_) => None,
pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
match name {
SourcePath::Path(p) => match self.id_or_new_timestamp_of(p).ok()? {
SourceState::UpToDate(id) => Some(id),
SourceState::Stale(_) => None,
},
name => Some(self.file_ids.get(name)?.id),
}
}

Expand All @@ -893,13 +948,12 @@ impl Cache {
///
/// The main point of this awkward signature is to minimize I/O operations: if we accessed
/// the timestamp, keep it around.
fn id_or_new_timestamp_of(&self, name: impl AsRef<OsStr>) -> io::Result<SourceState> {
let name = name.as_ref();
match self.file_ids.get(name) {
fn id_or_new_timestamp_of(&self, name: &Path) -> io::Result<SourceState> {
match self.file_ids.get(&SourcePath::Path(name.to_owned())) {
None => Ok(SourceState::Stale(timestamp(name)?)),
Some(NameIdEntry {
id,
source: Source::Filesystem(ts),
source: SourceKind::Filesystem(ts),
}) => {
let new_timestamp = timestamp(name)?;
if ts == &new_timestamp {
Expand All @@ -910,7 +964,7 @@ impl Cache {
}
Some(NameIdEntry {
id,
source: Source::Memory,
source: SourceKind::Memory,
}) => Ok(SourceState::UpToDate(*id)),
}
}
Expand Down Expand Up @@ -1023,11 +1077,10 @@ impl Cache {
let file_ids: HashMap<StdlibModule, FileId> = nickel_stdlib::modules()
.into_iter()
.map(|module| {
let name = module.file_name();
let content = module.content();
(
module,
self.add_string(OsString::from(name), String::from(content)),
self.add_string(SourcePath::Std(module), String::from(content)),
)
})
.collect();
Expand Down Expand Up @@ -1205,24 +1258,25 @@ pub trait ImportResolver {
fn resolve(
&mut self,
path: &OsStr,
parent: Option<PathBuf>,
parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError>;

/// Get a resolved import from the term cache.
fn get(&self, file_id: FileId) -> Option<RichTerm>;
/// Return the (potentially normalized) file path corresponding to the ID of a resolved import.
fn get_path(&self, file_id: FileId) -> &OsStr;
fn get_path(&self, file_id: FileId) -> Option<&OsStr>;
}

impl ImportResolver for Cache {
fn resolve(
&mut self,
path: &OsStr,
parent: Option<PathBuf>,
parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
let path_buf = with_parent(path, parent.clone());
let parent_path = parent.and_then(|p| self.get_path(p)).map(PathBuf::from);
let path_buf = with_parent(path, parent_path);
let format = InputFormat::from_path(&path_buf).unwrap_or(InputFormat::Nickel);
let id_op = self.get_or_add_file(&path_buf).map_err(|err| {
ImportError::IOError(
Expand All @@ -1237,12 +1291,8 @@ impl ImportResolver for Cache {
};

if let Some(parent) = parent {
let parent_id = self.id_of(parent).unwrap();
self.imports.entry(parent_id).or_default().insert(file_id);
self.rev_imports
.entry(file_id)
.or_default()
.insert(parent_id);
self.imports.entry(parent).or_default().insert(file_id);
self.rev_imports.entry(file_id).or_default().insert(parent);
}

self.parse_multi(file_id, format)
Expand All @@ -1260,8 +1310,10 @@ impl ImportResolver for Cache {
})
}

fn get_path(&self, file_id: FileId) -> &OsStr {
self.files.name(file_id)
fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
self.file_paths
.get(&file_id)
.and_then(|p| p.try_into().ok())
}
}

Expand Down Expand Up @@ -1340,7 +1392,7 @@ pub mod resolvers {
fn resolve(
&mut self,
_path: &OsStr,
_parent: Option<PathBuf>,
_parent: Option<FileId>,
_pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
panic!("cache::resolvers: dummy resolver should not have been invoked");
Expand All @@ -1350,7 +1402,7 @@ pub mod resolvers {
panic!("cache::resolvers: dummy resolver should not have been invoked");
}

fn get_path(&self, _file_id: FileId) -> &OsStr {
fn get_path(&self, _file_id: FileId) -> Option<&OsStr> {
panic!("cache::resolvers: dummy resolver should not have been invoked");
}
}
Expand Down Expand Up @@ -1381,7 +1433,7 @@ pub mod resolvers {
fn resolve(
&mut self,
path: &OsStr,
_parent: Option<PathBuf>,
_parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
let file_id = self
Expand Down Expand Up @@ -1417,8 +1469,8 @@ pub mod resolvers {
self.term_cache.get(&file_id).cloned()
}

fn get_path(&self, file_id: FileId) -> &OsStr {
self.files.name(file_id)
fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
Some(self.files.name(file_id))
}
}
}
9 changes: 5 additions & 4 deletions core/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::{

use codespan::FileId;
use codespan_reporting::term::termcolor::Ansi;
use std::path::PathBuf;

use std::{
ffi::OsString,
Expand Down Expand Up @@ -66,8 +67,7 @@ impl QueryPath {
grammar::FieldPathParser, lexer::Lexer, utils::FieldPathElem, ErrorTolerantParser,
};

let format_name = "query-path";
let input_id = cache.replace_string(format_name, input);
let input_id = cache.replace_string(SourcePath::Query, input);
let s = cache.source(input_id);

let parser = FieldPathParser::new();
Expand Down Expand Up @@ -166,7 +166,8 @@ impl<EC: EvalCache> Program<EC> {
S: Into<OsString> + Clone,
{
let mut cache = Cache::new(ErrorTolerance::Strict);
let main_id = cache.add_source(source_name, source)?;
let path = PathBuf::from(source_name.into());
let main_id = cache.add_source(SourcePath::Path(path), source)?;
let vm = VirtualMachine::new(cache, trace);

Ok(Self {
Expand Down Expand Up @@ -242,7 +243,7 @@ impl<EC: EvalCache> Program<EC> {
let value_file_id = self
.vm
.import_resolver_mut()
.add_string(format!("<override {}>", ovd.path.join(".")), ovd.value);
.add_string(SourcePath::Override(ovd.path.clone()), ovd.value);
self.vm.prepare_eval(value_file_id)?;
record = record
.path(ovd.path)
Expand Down
Loading

0 comments on commit 30ac61e

Please sign in to comment.