Skip to content

Commit

Permalink
perf: Optimizing parser performance
Browse files Browse the repository at this point in the history
Signed-off-by: he1pa <18012015693@163.com>
  • Loading branch information
He1pa committed Nov 4, 2024
1 parent a6381df commit aa920fc
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 89 deletions.
2 changes: 1 addition & 1 deletion kclvm/api/src/service/service_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ impl KclvmServiceImpl {
deps: result
.deps
.iter()
.map(|p| p.path.to_str().unwrap().to_string())
.map(|p| p.get_path().to_str().unwrap().to_string())
.collect(),
errors: result.errors.into_iter().map(|e| e.into_error()).collect(),
})
Expand Down
18 changes: 13 additions & 5 deletions kclvm/parser/src/file_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,30 @@ use std::{collections::HashMap, path::PathBuf};

use indexmap::IndexMap;
use kclvm_ast::ast::Module;
use kclvm_utils::path::PathPrefix;
use petgraph::{prelude::StableDiGraph, visit::EdgeRef};
use std::hash::Hash;
/// File with package info
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct PkgFile {
pub path: PathBuf,
path: PathBuf,
pub pkg_path: String,
}

impl PkgFile {
pub fn canonicalize(&self) -> PathBuf {
match self.path.canonicalize() {
Ok(p) => p.clone(),
_ => self.path.clone(),
pub fn new(path: PathBuf, pkg_path: String) -> PkgFile {
match path.canonicalize() {
Ok(p) => PkgFile {
path: PathBuf::from(p.adjust_canonicalization()),
pkg_path,
},
Err(_) => PkgFile { path, pkg_path },
}
}

pub fn get_path(&self) -> &PathBuf {
&self.path
}
}

#[derive(Debug, Clone, Hash, Eq, PartialEq)]
Expand Down
156 changes: 73 additions & 83 deletions kclvm/parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,7 @@ pub fn parse_single_file(filename: &str, code: Option<String>) -> Result<ParseFi
))
}
};
let file = PkgFile {
path: PathBuf::from(filename),
pkg_path: MAIN_PKG.to_string(),
};
let file = PkgFile::new(PathBuf::from(filename), MAIN_PKG.to_string());
let deps = if file_graph.contains_file(&file) {
file_graph.dependencies_of(&file).into_iter().collect()
} else {
Expand Down Expand Up @@ -690,28 +687,28 @@ pub fn parse_file(
let src = match src {
Some(src) => Some(src),
None => match &module_cache.read() {
Ok(cache) => cache.source_code.get(&file.canonicalize()),
Ok(cache) => cache.source_code.get(file.get_path()),
Err(_) => None,
}
.cloned(),
};
let m = parse_file_with_session(sess.clone(), file.path.to_str().unwrap(), src)?;
let m = parse_file_with_session(sess.clone(), file.get_path().to_str().unwrap(), src)?;
let deps = get_deps(&file, &m, pkgs, pkgmap, opts, sess)?;
let dep_files = deps.keys().map(|f| f.clone()).collect();
pkgmap.extend(deps.clone());
match &mut module_cache.write() {
Ok(module_cache) => {
module_cache
.ast_cache
.insert(file.canonicalize(), Arc::new(RwLock::new(m)));
match module_cache.file_pkg.get_mut(&file.canonicalize()) {
.insert(file.get_path().clone(), Arc::new(RwLock::new(m)));
match module_cache.file_pkg.get_mut(&file.get_path().clone()) {
Some(s) => {
s.insert(file.clone());
}
None => {
let mut s = HashSet::new();
s.insert(file.clone());
module_cache.file_pkg.insert(file.canonicalize(), s);
module_cache.file_pkg.insert(file.get_path().clone(), s);
}
}
module_cache.dep_cache.insert(file.clone(), deps);
Expand Down Expand Up @@ -763,10 +760,7 @@ pub fn get_deps(
}

pkg_info.k_files.iter().for_each(|p| {
let file = PkgFile {
path: p.into(),
pkg_path: pkg_info.pkg_path.clone(),
};
let file = PkgFile::new(p.into(), pkg_info.pkg_path.clone());
deps.insert(
file.clone(),
file_graph::Pkg {
Expand Down Expand Up @@ -815,16 +809,14 @@ pub fn parse_entry(
pkgmap: &mut PkgMap,
file_graph: FileGraphCache,
opts: &LoadProgramOptions,
parsed_file: &mut HashSet<PkgFile>,
) -> Result<HashSet<PkgFile>> {
let k_files = entry.get_k_files();
let maybe_k_codes = entry.get_k_codes();
let mut files = vec![];
let mut new_files = HashSet::new();
for (i, f) in k_files.iter().enumerate() {
let file = PkgFile {
path: f.adjust_canonicalization().into(),
pkg_path: MAIN_PKG.to_string(),
};
let file = PkgFile::new(f.adjust_canonicalization().into(), MAIN_PKG.to_string());
files.push((file.clone(), maybe_k_codes.get(i).unwrap_or(&None).clone()));
new_files.insert(file.clone());
pkgmap.insert(
Expand All @@ -845,76 +837,73 @@ pub fn parse_entry(
opts,
)?;
let mut unparsed_file: VecDeque<PkgFile> = dependent_paths.into();
let mut parsed_file: HashSet<PkgFile> = HashSet::new();
while let Some(file) = unparsed_file.pop_front() {
if parsed_file.insert(file.clone()) {
match &mut module_cache.write() {
Ok(m_cache) => match m_cache.file_pkg.get_mut(&file.canonicalize()) {
Some(s) => {
// The module ast has been parsed, but does not belong to the same package
if s.insert(file.clone()) {
new_files.insert(file.clone());
}
}
None => {
let mut s = HashSet::new();
s.insert(file.clone());
m_cache.file_pkg.insert(file.canonicalize(), s);
match &mut module_cache.write() {
Ok(m_cache) => match m_cache.file_pkg.get_mut(file.get_path()) {
Some(s) => {
// The module ast has been parsed, but does not belong to the same package
if s.insert(file.clone()) {
new_files.insert(file.clone());
}
},
Err(e) => return Err(anyhow::anyhow!("Parse file failed: {e}")),
}
}
None => {
let mut s = HashSet::new();
s.insert(file.clone());
m_cache.file_pkg.insert(file.get_path().clone(), s);
new_files.insert(file.clone());
}
},
Err(e) => return Err(anyhow::anyhow!("Parse file failed: {e}")),
}

let module_cache_read = module_cache.read();
match &module_cache_read {
Ok(m_cache) => match m_cache.ast_cache.get(&file.canonicalize()) {
Some(m) => {
let deps = m_cache.dep_cache.get(&file).cloned().unwrap_or_else(|| {
get_deps(&file, &m.read().unwrap(), pkgs, pkgmap, opts, sess.clone())
.unwrap()
});
let dep_files: Vec<PkgFile> = deps.keys().map(|f| f.clone()).collect();
pkgmap.extend(deps.clone());

match &mut file_graph.write() {
Ok(file_graph) => {
file_graph.update_file(&file, &dep_files);

for dep in dep_files {
if !parsed_file.contains(&dep) {
unparsed_file.push_back(dep.clone());
}
let module_cache_read = module_cache.read();
match &module_cache_read {
Ok(m_cache) => match m_cache.ast_cache.get(file.get_path()) {
Some(m) => {
let deps = m_cache.dep_cache.get(&file).cloned().unwrap_or_else(|| {
get_deps(&file, &m.read().unwrap(), pkgs, pkgmap, opts, sess.clone())
.unwrap()
});
let dep_files: Vec<PkgFile> = deps.keys().map(|f| f.clone()).collect();
pkgmap.extend(deps.clone());

match &mut file_graph.write() {
Ok(file_graph) => {
file_graph.update_file(&file, &dep_files);

for dep in dep_files {
if parsed_file.insert(dep.clone()) {
unparsed_file.push_back(dep.clone());
}

continue;
}
Err(e) => return Err(anyhow::anyhow!("Parse entry failed: {e}")),

continue;
}
Err(e) => return Err(anyhow::anyhow!("Parse entry failed: {e}")),
}
None => {
new_files.insert(file.clone());
drop(module_cache_read);
let deps = parse_file(
sess.clone(),
file,
None,
module_cache.clone(),
pkgs,
pkgmap,
file_graph.clone(),
&opts,
)?;
for dep in deps {
if !parsed_file.contains(&dep) {
unparsed_file.push_back(dep.clone());
}
}
None => {
new_files.insert(file.clone());
drop(module_cache_read);
let deps = parse_file(
sess.clone(),
file,
None,
module_cache.clone(),
pkgs,
pkgmap,
file_graph.clone(),
&opts,
)?;
for dep in deps {
if parsed_file.insert(dep.clone()) {
unparsed_file.push_back(dep.clone());
}
}
},
Err(e) => return Err(anyhow::anyhow!("Parse entry failed: {e}")),
};
}
}
},
Err(e) => return Err(anyhow::anyhow!("Parse entry failed: {e}")),
};
}
Ok(new_files)
}
Expand All @@ -931,7 +920,7 @@ pub fn parse_program(
let mut pkgs: HashMap<String, Vec<String>> = HashMap::new();
let mut pkgmap = PkgMap::new();
let mut new_files = HashSet::new();

let mut parsed_file: HashSet<PkgFile> = HashSet::new();
for entry in compile_entries.iter() {
new_files.extend(parse_entry(
sess.clone(),
Expand All @@ -941,6 +930,7 @@ pub fn parse_program(
&mut pkgmap,
file_graph.clone(),
&opts,
&mut parsed_file,
)?);
}

Expand Down Expand Up @@ -979,14 +969,14 @@ pub fn parse_program(

let mut modules: HashMap<String, Arc<RwLock<Module>>> = HashMap::new();
for file in files.iter() {
let filename = file.path.adjust_canonicalization();
let filename = file.get_path().to_str().unwrap().to_string();
let m_ref = match module_cache.read() {
Ok(module_cache) => module_cache
.ast_cache
.get(&file.canonicalize())
.get(file.get_path())
.expect(&format!(
"Module not found in module: {:?}",
file.canonicalize()
file.get_path()
))
.clone(),
Err(e) => return Err(anyhow::anyhow!("Parse program failed: {e}")),
Expand All @@ -999,7 +989,7 @@ pub fn parse_program(
modules.insert(filename.clone(), m_ref);
match pkgs.get_mut(&file.pkg_path) {
Some(pkg_modules) => {
pkg_modules.push(filename);
pkg_modules.push(filename.clone());
}
None => {
pkgs.insert(file.pkg_path.clone(), vec![filename]);
Expand All @@ -1016,6 +1006,6 @@ pub fn parse_program(
Ok(LoadProgramResult {
program,
errors: sess.1.read().diagnostics.clone(),
paths: files.iter().map(|file| file.path.clone()).collect(),
paths: files.iter().map(|file| file.get_path().clone()).collect(),
})
}

0 comments on commit aa920fc

Please sign in to comment.