Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make an enum for different input sources #1560

Merged
merged 4 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 110 additions & 58 deletions core/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ impl InputFormat {
pub struct Cache {
/// The content of the program sources plus imports.
files: Files<String>,
file_paths: HashMap<FileId, SourcePath>,
/// The name-id table, holding file ids stored in the database indexed by source names.
file_ids: HashMap<OsString, NameIdEntry>,
file_ids: HashMap<SourcePath, NameIdEntry>,
/// Map containing for each FileId a list of files they import (directly).
imports: HashMap<FileId, HashSet<FileId>>,
/// Map containing for each FileId a list of files importing them (directly).
Expand Down Expand Up @@ -129,7 +130,7 @@ pub struct TermEntry {
/// are not auto-refreshed. If an in-memory buffer has a path that also exists in the
/// filesystem, we will not even check that file to see if it has changed.
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
enum Source {
enum SourceKind {
Filesystem(SystemTime),
Memory,
}
Expand All @@ -150,7 +151,7 @@ enum Source {
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
pub struct NameIdEntry {
id: FileId,
source: Source,
source: SourceKind,
}

/// The state of an entry of the term cache.
Expand Down Expand Up @@ -221,6 +222,68 @@ impl<E> CacheError<E> {
}
}

/// Input data usually comes from files on the file system, but there are also
/// lots of cases where we want to synthesize other kinds of inputs.
///
/// Note that a `SourcePath` does not uniquely identify a cached input:
/// - Some functions (like [`Cache::add_file`]) add a new cached input unconditionally.
/// - [`Cache::get_or_add_file`] will add a new cached input at the same `SourcePath` if
/// the file on disk was updated.
///
/// The equality checking of `SourcePath` only affects [`Cache::replace_string`], which
/// overwrites any previous cached input with the same `SourcePath`.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub enum SourcePath {
/// A file at the given path.
///
/// Note that this does not need to be a real file on the filesystem: it could still
/// be loaded from memory by, e.g, [`Cache::add_string`].
yannham marked this conversation as resolved.
Show resolved Hide resolved
///
/// This is the only `SourcePath` variant that can be resolved as the target
/// of an import statement.
Path(PathBuf),
/// A subrange of a file at the given path.
///
/// This is used by nls to analyze small parts of files that don't fully parse. The
/// original file path is preserved, because it's needed for resolving imports.
Snippet(PathBuf),
Std(StdlibModule),
Query,
ReplInput(usize),
ReplTypecheck,
ReplQuery,
Override(Vec<String>),
}

impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
type Error = ();

fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
match value {
SourcePath::Path(p) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
_ => Err(()),
}
}
}

// [`Files`] needs to have an OsString for each file, so we synthesize names even for
// sources that don't have them. They don't need to be unique; they're just used for
// diagnostics.
impl From<SourcePath> for OsString {
fn from(source_path: SourcePath) -> Self {
match source_path {
SourcePath::Path(p) | SourcePath::Snippet(p) => p.into(),
SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
SourcePath::Query => "<query>".into(),
SourcePath::ReplInput(idx) => format!("<repl-input-{idx}").into(),
SourcePath::ReplTypecheck => "<repl-typecheck>".into(),
SourcePath::ReplQuery => "<repl-query>".into(),
SourcePath::Override(path) => format!("<override {}>", path.join(".")).into(),
}
}
}

/// Return status indicating if an import has been resolved from a file (first encounter), or was
/// retrieved from the cache.
///
Expand All @@ -246,6 +309,7 @@ impl Cache {
Cache {
files: Files::new(),
file_ids: HashMap::new(),
file_paths: HashMap::new(),
terms: HashMap::new(),
wildcards: HashMap::new(),
imports: HashMap::new(),
Expand All @@ -258,29 +322,18 @@ impl Cache {
}
}

/// Load a file in the file database. Do not insert an entry in the name-id table.
fn load_file(&mut self, path: impl Into<OsString>) -> io::Result<FileId> {
let path = path.into();
let mut buffer = String::new();
fs::File::open(&path)
.and_then(|mut file| file.read_to_string(&mut buffer))
.map(|_| self.files.add(path, buffer))
}

/// Same as [Self::add_file], but assume that the path is already normalized, and take the
/// timestamp as a parameter.
fn add_file_(
&mut self,
path: impl Into<OsString>,
timestamp: SystemTime,
) -> io::Result<FileId> {
let path = path.into();
let file_id = self.load_file(path.clone())?;
fn add_file_(&mut self, path: PathBuf, timestamp: SystemTime) -> io::Result<FileId> {
let contents = std::fs::read_to_string(&path)?;
let file_id = self.files.add(&path, contents);
self.file_paths
.insert(file_id, SourcePath::Path(path.clone()));
self.file_ids.insert(
path,
SourcePath::Path(path),
NameIdEntry {
id: file_id,
source: Source::Filesystem(timestamp),
source: SourceKind::Filesystem(timestamp),
},
);
Ok(file_id)
Expand All @@ -303,7 +356,7 @@ impl Cache {
pub fn get_or_add_file(&mut self, path: impl Into<OsString>) -> io::Result<CacheOp<FileId>> {
let path = path.into();
let normalized = normalize_path(&path)?;
match self.id_or_new_timestamp_of(&path)? {
match self.id_or_new_timestamp_of(path.as_ref())? {
SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
SourceState::Stale(timestamp) => {
self.add_file_(normalized, timestamp).map(CacheOp::Done)
Expand All @@ -315,10 +368,9 @@ impl Cache {
///
/// Do not check if a source with the same name already exists: if it is the
/// case, this one will override the old entry in the name-id table.
pub fn add_source<T, S>(&mut self, source_name: S, mut source: T) -> io::Result<FileId>
pub fn add_source<T>(&mut self, source_name: SourcePath, mut source: T) -> io::Result<FileId>
where
T: Read,
S: Into<OsString>,
{
let mut buffer = String::new();
source.read_to_string(&mut buffer)?;
Expand All @@ -333,14 +385,14 @@ impl Cache {
///
/// Do not check if a source with the same name already exists: if it is the case, this one
/// will override the old entry in the name-id table but the old `FileId` will remain valid.
pub fn add_string(&mut self, source_name: impl Into<OsString>, s: String) -> FileId {
let source_name = source_name.into();
pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId {
let id = self.files.add(source_name.clone(), s);
self.file_paths.insert(id, source_name.clone());
self.file_ids.insert(
source_name,
NameIdEntry {
id,
source: Source::Memory,
source: SourceKind::Memory,
},
);
id
Expand All @@ -353,19 +405,19 @@ impl Cache {
///
/// Used to store intermediate short-lived generated snippets that needs to have a
/// corresponding `FileId`, such as when querying or reporting errors.
pub fn replace_string(&mut self, source_name: impl Into<OsString>, s: String) -> FileId {
let source_name = source_name.into();
pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
if let Some(file_id) = self.id_of(&source_name) {
self.files.update(file_id, s);
self.terms.remove(&file_id);
file_id
} else {
let file_id = self.files.add(source_name.clone(), s);
self.file_paths.insert(file_id, source_name.clone());
self.file_ids.insert(
source_name,
NameIdEntry {
id: file_id,
source: Source::Memory,
source: SourceKind::Memory,
},
);
file_id
Expand Down Expand Up @@ -878,10 +930,13 @@ impl Cache {
///
/// Note that files added via [Self::add_file] are indexed by their full normalized path (cf
/// [normalize_path]).
pub fn id_of(&self, name: impl AsRef<OsStr>) -> Option<FileId> {
match self.id_or_new_timestamp_of(name).ok()? {
SourceState::UpToDate(id) => Some(id),
SourceState::Stale(_) => None,
pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
match name {
SourcePath::Path(p) => match self.id_or_new_timestamp_of(p).ok()? {
SourceState::UpToDate(id) => Some(id),
SourceState::Stale(_) => None,
},
name => Some(self.file_ids.get(name)?.id),
}
}

Expand All @@ -893,13 +948,12 @@ impl Cache {
///
/// The main point of this awkward signature is to minimize I/O operations: if we accessed
/// the timestamp, keep it around.
fn id_or_new_timestamp_of(&self, name: impl AsRef<OsStr>) -> io::Result<SourceState> {
let name = name.as_ref();
match self.file_ids.get(name) {
fn id_or_new_timestamp_of(&self, name: &Path) -> io::Result<SourceState> {
match self.file_ids.get(&SourcePath::Path(name.to_owned())) {
None => Ok(SourceState::Stale(timestamp(name)?)),
Some(NameIdEntry {
id,
source: Source::Filesystem(ts),
source: SourceKind::Filesystem(ts),
}) => {
let new_timestamp = timestamp(name)?;
if ts == &new_timestamp {
Expand All @@ -910,7 +964,7 @@ impl Cache {
}
Some(NameIdEntry {
id,
source: Source::Memory,
source: SourceKind::Memory,
}) => Ok(SourceState::UpToDate(*id)),
}
}
Expand Down Expand Up @@ -1023,11 +1077,10 @@ impl Cache {
let file_ids: HashMap<StdlibModule, FileId> = nickel_stdlib::modules()
.into_iter()
.map(|module| {
let name = module.file_name();
let content = module.content();
(
module,
self.add_string(OsString::from(name), String::from(content)),
self.add_string(SourcePath::Std(module), String::from(content)),
)
})
.collect();
Expand Down Expand Up @@ -1205,24 +1258,25 @@ pub trait ImportResolver {
fn resolve(
&mut self,
path: &OsStr,
parent: Option<PathBuf>,
parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError>;

/// Get a resolved import from the term cache.
fn get(&self, file_id: FileId) -> Option<RichTerm>;
/// Return the (potentially normalized) file path corresponding to the ID of a resolved import.
fn get_path(&self, file_id: FileId) -> &OsStr;
fn get_path(&self, file_id: FileId) -> Option<&OsStr>;
}

impl ImportResolver for Cache {
fn resolve(
&mut self,
path: &OsStr,
parent: Option<PathBuf>,
parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
let path_buf = with_parent(path, parent.clone());
let parent_path = parent.and_then(|p| self.get_path(p)).map(PathBuf::from);
let path_buf = with_parent(path, parent_path);
let format = InputFormat::from_path(&path_buf).unwrap_or(InputFormat::Nickel);
let id_op = self.get_or_add_file(&path_buf).map_err(|err| {
ImportError::IOError(
Expand All @@ -1237,12 +1291,8 @@ impl ImportResolver for Cache {
};

if let Some(parent) = parent {
let parent_id = self.id_of(parent).unwrap();
self.imports.entry(parent_id).or_default().insert(file_id);
self.rev_imports
.entry(file_id)
.or_default()
.insert(parent_id);
self.imports.entry(parent).or_default().insert(file_id);
self.rev_imports.entry(file_id).or_default().insert(parent);
}

self.parse_multi(file_id, format)
Expand All @@ -1260,8 +1310,10 @@ impl ImportResolver for Cache {
})
}

fn get_path(&self, file_id: FileId) -> &OsStr {
self.files.name(file_id)
fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
self.file_paths
.get(&file_id)
.and_then(|p| p.try_into().ok())
}
}

Expand Down Expand Up @@ -1340,7 +1392,7 @@ pub mod resolvers {
fn resolve(
&mut self,
_path: &OsStr,
_parent: Option<PathBuf>,
_parent: Option<FileId>,
_pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
panic!("cache::resolvers: dummy resolver should not have been invoked");
Expand All @@ -1350,7 +1402,7 @@ pub mod resolvers {
panic!("cache::resolvers: dummy resolver should not have been invoked");
}

fn get_path(&self, _file_id: FileId) -> &OsStr {
fn get_path(&self, _file_id: FileId) -> Option<&OsStr> {
panic!("cache::resolvers: dummy resolver should not have been invoked");
}
}
Expand Down Expand Up @@ -1381,7 +1433,7 @@ pub mod resolvers {
fn resolve(
&mut self,
path: &OsStr,
_parent: Option<PathBuf>,
_parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
let file_id = self
Expand Down Expand Up @@ -1417,8 +1469,8 @@ pub mod resolvers {
self.term_cache.get(&file_id).cloned()
}

fn get_path(&self, file_id: FileId) -> &OsStr {
self.files.name(file_id)
fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
Some(self.files.name(file_id))
}
}
}
9 changes: 5 additions & 4 deletions core/src/program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::{

use codespan::FileId;
use codespan_reporting::term::termcolor::Ansi;
use std::path::PathBuf;

use std::{
ffi::OsString,
Expand Down Expand Up @@ -66,8 +67,7 @@ impl QueryPath {
grammar::FieldPathParser, lexer::Lexer, utils::FieldPathElem, ErrorTolerantParser,
};

let format_name = "query-path";
let input_id = cache.replace_string(format_name, input);
let input_id = cache.replace_string(SourcePath::Query, input);
let s = cache.source(input_id);

let parser = FieldPathParser::new();
Expand Down Expand Up @@ -166,7 +166,8 @@ impl<EC: EvalCache> Program<EC> {
S: Into<OsString> + Clone,
{
let mut cache = Cache::new(ErrorTolerance::Strict);
let main_id = cache.add_source(source_name, source)?;
let path = PathBuf::from(source_name.into());
let main_id = cache.add_source(SourcePath::Path(path), source)?;
let vm = VirtualMachine::new(cache, trace);

Ok(Self {
Expand Down Expand Up @@ -242,7 +243,7 @@ impl<EC: EvalCache> Program<EC> {
let value_file_id = self
.vm
.import_resolver_mut()
.add_string(format!("<override {}>", ovd.path.join(".")), ovd.value);
.add_string(SourcePath::Override(ovd.path.clone()), ovd.value);
self.vm.prepare_eval(value_file_id)?;
record = record
.path(ovd.path)
Expand Down
Loading