diff --git a/stage0/src/ast/func.rs b/stage0/src/ast/func.rs index 6be174f..f0df8ef 100644 --- a/stage0/src/ast/func.rs +++ b/stage0/src/ast/func.rs @@ -1,5 +1,7 @@ use super::{Attribute, Statement, Type}; -use crate::lexer::{FnKeyword, Identifier}; +use crate::codegen::{Codegen, LlvmFunc, LlvmType, LlvmVoid}; +use crate::lexer::{FnKeyword, Identifier, SyntaxError}; +use std::ffi::CString; /// A function. pub struct Function { @@ -29,6 +31,58 @@ impl Function { body, } } + + pub fn build<'a, 'b: 'a>( + &self, + cx: &'a Codegen<'b>, + container: &str, + ) -> Result, SyntaxError> { + // Check if function already exists. + let name = CString::new(cx.encode_name(container, self.name.value())).unwrap(); + + if LlvmFunc::get(cx, &name).is_some() { + return Err(SyntaxError::new( + self.name.span().clone(), + "multiple definition of the same name", + )); + } + + // Get params. + let mut params = Vec::>::new(); + + for p in &self.params { + let ty = match p.ty.build(cx) { + Some(v) => v, + None => { + return Err(SyntaxError::new( + p.ty.name().span(), + "function parameter cannot be a never type", + )); + } + }; + + params.push(ty); + } + + // Get return type. + let mut never = false; + let ret = match &self.ret { + Some(v) => match v.build(cx) { + Some(v) => v, + None => { + never = true; + LlvmType::Void(LlvmVoid::new(cx)) + } + }, + None => LlvmType::Void(LlvmVoid::new(cx)), + }; + + // Create a function. + let func = LlvmFunc::new(cx, name, ¶ms, ret); + + // TODO: Build function body. + Ok(func) + } } /// A function parameter. diff --git a/stage0/src/ast/mod.rs b/stage0/src/ast/mod.rs index 77cc1ee..5aac3fa 100644 --- a/stage0/src/ast/mod.rs +++ b/stage0/src/ast/mod.rs @@ -4,6 +4,7 @@ pub use self::expr::*; pub use self::func::*; pub use self::imp::*; pub use self::node::*; +pub use self::path::*; pub use self::stmt::*; pub use self::struc::*; pub use self::ty::*; @@ -21,6 +22,7 @@ mod expr; mod func; mod imp; mod node; +mod path; mod stmt; mod struc; mod ty; @@ -55,6 +57,10 @@ impl SourceFile { Ok(file) } + pub fn path(&self) -> &std::path::Path { + &self.path + } + pub fn ty(&self) -> Option<&TypeDefinition> { self.ty.as_ref() } @@ -843,7 +849,10 @@ impl SourceFile { loop { match lex.next()? { - Some(Token::FullStop(_)) => fqtn.push(ident), + Some(Token::FullStop(v)) => { + fqtn.push(Token::FullStop(v)); + fqtn.push(Token::Identifier(ident)); + } Some(_) => { lex.undo(); break; @@ -865,9 +874,9 @@ impl SourceFile { }; } - fqtn.push(ident); + fqtn.push(Token::Identifier(ident)); - TypeName::Ident(fqtn) + TypeName::Ident(Path::new(fqtn)) } t => return Err(SyntaxError::new(t.span().clone(), "invalid type")), }; diff --git a/stage0/src/ast/path.rs b/stage0/src/ast/path.rs new file mode 100644 index 0000000..592b034 --- /dev/null +++ b/stage0/src/ast/path.rs @@ -0,0 +1,34 @@ +use crate::lexer::{Span, Token}; + +/// A path of identifier (e.g. `foo.bar.Foo`). +pub struct Path { + components: Vec, +} + +impl Path { + pub fn new(components: Vec) -> Self { + assert!(!components.is_empty()); + assert!(components.last().unwrap().is_identifier()); + + for i in 0..components.len() { + if i % 2 == 0 { + assert!(components[i].is_identifier()); + } else { + assert!(components[i].is_full_stop()); + } + } + + Self { components } + } + + pub fn span(&self) -> Span { + let mut iter = self.components.iter(); + let mut span = iter.next().unwrap().span().clone(); + + for s in iter { + span = &span + s.span(); + } + + span + } +} diff --git a/stage0/src/ast/ty.rs b/stage0/src/ast/ty.rs index 33fc450..f1457bd 100644 --- a/stage0/src/ast/ty.rs +++ b/stage0/src/ast/ty.rs @@ -1,5 +1,6 @@ +use super::Path; use crate::codegen::{Codegen, LlvmType, LlvmVoid}; -use crate::lexer::{Asterisk, CloseParenthesis, ExclamationMark, Identifier, OpenParenthesis}; +use crate::lexer::{Asterisk, CloseParenthesis, ExclamationMark, OpenParenthesis, Span}; /// A type of something (e.g. variable). pub struct Type { @@ -12,19 +13,35 @@ impl Type { Self { prefixes, name } } - pub fn build<'a>(&self, cx: &'a Codegen) -> Option> { + pub fn name(&self) -> &TypeName { + &self.name + } + + pub fn build<'a, 'b: 'a>(&self, cx: &'a Codegen<'b>) -> Option> { let mut ty = match &self.name { TypeName::Unit(_, _) => Some(LlvmType::Void(LlvmVoid::new(cx))), TypeName::Never(_) => None, TypeName::Ident(_) => todo!(), }; + // TODO: Resolve pointers. ty } } +/// Name of a [`Type`]. pub enum TypeName { Unit(OpenParenthesis, CloseParenthesis), Never(ExclamationMark), - Ident(Vec), + Ident(Path), +} + +impl TypeName { + pub fn span(&self) -> Span { + match self { + TypeName::Unit(o, c) => o.span() + c.span(), + TypeName::Never(v) => v.span().clone(), + TypeName::Ident(v) => v.span(), + } + } } diff --git a/stage0/src/codegen/func.rs b/stage0/src/codegen/func.rs new file mode 100644 index 0000000..f574135 --- /dev/null +++ b/stage0/src/codegen/func.rs @@ -0,0 +1,50 @@ +use super::{Codegen, LlvmType}; +use llvm_sys::core::{LLVMAddFunction, LLVMFunctionType, LLVMGetNamedFunction}; +use llvm_sys::prelude::{LLVMTypeRef, LLVMValueRef}; +use std::ffi::CStr; +use std::marker::PhantomData; + +/// A function. +pub struct LlvmFunc<'a, 'b: 'a> { + value: LLVMValueRef, + phantom: PhantomData<&'a Codegen<'b>>, +} + +impl<'a, 'b: 'a> LlvmFunc<'a, 'b> { + pub fn get>(cx: &'a Codegen<'b>, name: N) -> Option { + let name = name.as_ref(); + let value = unsafe { LLVMGetNamedFunction(cx.module, name.as_ptr()) }; + + if value.is_null() { + None + } else { + Some(Self { + value, + phantom: PhantomData, + }) + } + } + + pub fn new>( + cx: &'a Codegen<'b>, + name: N, + params: &[LlvmType<'a, 'b>], + ret: LlvmType<'a, 'b>, + ) -> Self { + let name = name.as_ref(); + let mut params: Vec = params.iter().map(|p| p.as_raw()).collect(); + let ty = unsafe { + LLVMFunctionType( + ret.as_raw(), + params.as_mut_ptr(), + params.len().try_into().unwrap(), + 0, + ) + }; + + Self { + value: unsafe { LLVMAddFunction(cx.module, name.as_ptr(), ty) }, + phantom: PhantomData, + } + } +} diff --git a/stage0/src/codegen/mod.rs b/stage0/src/codegen/mod.rs index 613e45a..b139a9f 100644 --- a/stage0/src/codegen/mod.rs +++ b/stage0/src/codegen/mod.rs @@ -1,31 +1,63 @@ +pub use self::func::*; pub use self::ty::*; +use crate::pkg::PackageVersion; use llvm_sys::core::{ LLVMContextCreate, LLVMContextDispose, LLVMDisposeModule, LLVMModuleCreateWithNameInContext, }; use llvm_sys::prelude::{LLVMContextRef, LLVMModuleRef}; use std::ffi::CStr; +mod func; mod ty; /// A context for code generation. /// /// Each [`Codegen`] can output only one binary. -pub struct Codegen { +pub struct Codegen<'a> { module: LLVMModuleRef, llvm: LLVMContextRef, + pkg: &'a str, + version: &'a PackageVersion, } -impl Codegen { - pub fn new>(module: M) -> Self { +impl<'a> Codegen<'a> { + pub fn new>(pkg: &'a str, version: &'a PackageVersion, module: M) -> Self { let llvm = unsafe { LLVMContextCreate() }; let module = unsafe { LLVMModuleCreateWithNameInContext(module.as_ref().as_ptr(), llvm) }; - Self { module, llvm } + Self { + module, + llvm, + pkg, + version, + } + } + + pub fn encode_name(&self, container: &str, name: &str) -> String { + // TODO: Create a mangleg name according to Itanium C++ ABI. + // https://itanium-cxx-abi.github.io/cxx-abi/abi.html might be useful. + if self.version.major() == 0 { + format!( + "{}::0.{}::{}.{}", + self.pkg, + self.version.minor(), + container, + name + ) + } else { + format!( + "{}::{}::{}.{}", + self.pkg, + self.version.major(), + container, + name + ) + } } } -impl Drop for Codegen { +impl<'a> Drop for Codegen<'a> { fn drop(&mut self) { unsafe { LLVMDisposeModule(self.module) }; unsafe { LLVMContextDispose(self.llvm) }; diff --git a/stage0/src/codegen/ty.rs b/stage0/src/codegen/ty.rs index 1992f02..bb394da 100644 --- a/stage0/src/codegen/ty.rs +++ b/stage0/src/codegen/ty.rs @@ -4,20 +4,30 @@ use llvm_sys::prelude::LLVMTypeRef; use std::marker::PhantomData; /// Encapsulate an LLVM type. -pub enum LlvmType<'a> { - Void(LlvmVoid<'a>), - U8(LlvmU8<'a>), - Ptr(LlvmPtr<'a>), +pub enum LlvmType<'a, 'b: 'a> { + Void(LlvmVoid<'a, 'b>), + U8(LlvmU8<'a, 'b>), + Ptr(LlvmPtr<'a, 'b>), +} + +impl<'a, 'b: 'a> LlvmType<'a, 'b> { + pub fn as_raw(&self) -> LLVMTypeRef { + match self { + Self::Void(v) => v.ty, + Self::U8(v) => v.ty, + Self::Ptr(v) => v.ty, + } + } } /// An unit type. -pub struct LlvmVoid<'a> { +pub struct LlvmVoid<'a, 'b: 'a> { ty: LLVMTypeRef, - phantom: PhantomData<&'a Codegen>, + phantom: PhantomData<&'a Codegen<'b>>, } -impl<'a> LlvmVoid<'a> { - pub fn new(cx: &'a Codegen) -> Self { +impl<'a, 'b: 'a> LlvmVoid<'a, 'b> { + pub fn new(cx: &'a Codegen<'b>) -> Self { Self { ty: unsafe { LLVMVoidTypeInContext(cx.llvm) }, phantom: PhantomData, @@ -26,13 +36,13 @@ impl<'a> LlvmVoid<'a> { } /// A `u8` type. -pub struct LlvmU8<'a> { +pub struct LlvmU8<'a, 'b: 'a> { ty: LLVMTypeRef, - phantom: PhantomData<&'a Codegen>, + phantom: PhantomData<&'a Codegen<'b>>, } -impl<'a> LlvmU8<'a> { - pub fn new(cx: &'a Codegen) -> Self { +impl<'a, 'b: 'a> LlvmU8<'a, 'b> { + pub fn new(cx: &'a Codegen<'b>) -> Self { Self { ty: unsafe { LLVMInt8TypeInContext(cx.llvm) }, phantom: PhantomData, @@ -41,14 +51,14 @@ impl<'a> LlvmU8<'a> { } /// A pointer to something. -pub struct LlvmPtr<'a> { +pub struct LlvmPtr<'a, 'b: 'a> { ty: LLVMTypeRef, - pointee: Box>, - phantom: PhantomData<&'a Codegen>, + pointee: Box>, + phantom: PhantomData<&'a Codegen<'b>>, } -impl<'a> LlvmPtr<'a> { - pub fn new(cx: &'a Codegen, pointee: LlvmType<'a>) -> Self { +impl<'a, 'b: 'a> LlvmPtr<'a, 'b> { + pub fn new(cx: &'a Codegen<'b>, pointee: LlvmType<'a, 'b>) -> Self { Self { ty: unsafe { LLVMPointerTypeInContext(cx.llvm, 0) }, pointee: Box::new(pointee), diff --git a/stage0/src/lexer/span.rs b/stage0/src/lexer/span.rs index 34c5135..abe7ce4 100644 --- a/stage0/src/lexer/span.rs +++ b/stage0/src/lexer/span.rs @@ -1,20 +1,25 @@ +use std::cmp::{max, min}; use std::fmt::{Display, Formatter}; +use std::ops::Add; use std::rc::Rc; /// A span in the source file. #[derive(Debug, Clone)] pub struct Span { source: Rc, - offset: usize, - length: usize, + begin: usize, + end: usize, } impl Span { pub fn new(source: Rc, offset: usize, length: usize) -> Self { + assert_ne!(*source.as_bytes().get(offset).unwrap(), b'\n'); + assert_ne!(length, 0); + Self { source, - offset, - length, + begin: offset, + end: offset + length, } } @@ -23,69 +28,127 @@ impl Span { } pub fn offset(&self) -> usize { - self.offset + self.begin + } + + fn create_indicator_line(target: &str, start: usize, end: usize) -> String { + let mut target = target.chars(); + let mut line = String::new(); + + for _ in 0..start { + target.next().unwrap(); + line.push(' '); + } + + for _ in start..end { + line.push(if target.next().unwrap().is_whitespace() { + ' ' + } else { + '^' + }); + } + + line + } +} + +impl Add for &Span { + type Output = Span; + + fn add(self, rhs: Self) -> Self::Output { + assert!(Rc::ptr_eq(&self.source, &rhs.source)); + + let source = self.source.clone(); + let begin = min(self.begin, rhs.begin); + let end = max(self.end, rhs.end); + + Span { source, begin, end } } } impl Display for Span { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - let head = &self.source[..self.offset]; - let span = &self.source[self.offset..(self.offset + self.length)]; - let tail = &self.source[(self.offset + self.length)..]; - let mut lines = vec![String::new()]; + let mut line = 0; let mut col = 0; + let mut lines = vec![(String::new(), Some(line))]; + let mut offset = 0; + let mut start = None; + let mut end = None; + let mut first = None; + let mut last = None; + + for ch in self.source.chars() { + if offset == self.begin { + start = Some(col); + first = Some(lines.len() - 1); + } else if offset == self.end { + end = Some(col); + } - // Load lines from head. - for ch in head.chars() { match ch { '\r' => {} '\n' => { - lines.push(String::new()); + if let Some(c) = start { + // Add an indicator line. + let l = lines.last().unwrap().0.as_str(); + let e = end.unwrap_or_else(|| l.len()); + let l = Self::create_indicator_line(l, c, e); + + if l.chars().any(|c| !c.is_whitespace()) { + last = Some(lines.len()); + lines.push((l, None)); + } + + // Check for multi-line span. + if end.is_some() { + start = None; + end = None; + } else { + start = Some(0); + } + } + + // Insert next source line. + line += 1; + lines.push((String::new(), Some(line))); col = 0; } - v => { - lines.last_mut().unwrap().push(v); + _ => { + lines.last_mut().unwrap().0.push(ch); col += 1; } } - } - // Push span content. - lines.last_mut().unwrap().push_str(span); - - // Load remaining line. - let mut tail = tail.chars(); - - while let Some(ch) = tail.next() { - match ch { - '\r' => {} - '\n' => { - lines.push(String::new()); - break; - } - v => lines.last_mut().unwrap().push(v), - } + offset += ch.len_utf8(); } - // Push a cursor. - for _ in 0..col { - lines.last_mut().unwrap().push(' '); - } + if let Some(c) = start { + let l = lines.last().unwrap().0.as_str(); + let e = l.len(); + let l = Self::create_indicator_line(l, c, e); - if self.length == 0 { - lines.last_mut().unwrap().push('^'); - } else { - for _ in 0..self.length { - lines.last_mut().unwrap().push('^'); + if l.chars().any(|c| !c.is_whitespace()) { + last = Some(lines.len()); + lines.push((l, None)); } } // Write. - for i in lines.len().checked_sub(10).unwrap_or(0)..(lines.len() - 1) { - writeln!(f, "{:>5} | {}", i + 1, lines[i])?; - } + let first = first.unwrap(); + let last = last.unwrap(); + + for i in first..=last { + let l = &lines[i]; - write!(f, " | {}", lines.last().unwrap())?; + if let Some(n) = l.1 { + // Line from the source is never be the last line. + writeln!(f, "{:>5} | {}", n + 1, l.0)?; + } else if i == last { + write!(f, " | {}", l.0)?; + } else { + writeln!(f, " | {}", l.0)?; + } + } Ok(()) } diff --git a/stage0/src/lexer/token.rs b/stage0/src/lexer/token.rs index bd0a910..226ecdc 100644 --- a/stage0/src/lexer/token.rs +++ b/stage0/src/lexer/token.rs @@ -30,6 +30,20 @@ pub enum Token { } impl Token { + pub fn is_full_stop(&self) -> bool { + match self { + Token::FullStop(_) => true, + _ => false, + } + } + + pub fn is_identifier(&self) -> bool { + match self { + Token::Identifier(_) => true, + _ => false, + } + } + pub fn span(&self) -> &Span { match self { Self::ExclamationMark(v) => &v.0, @@ -218,6 +232,10 @@ impl ExclamationMark { pub fn new(span: Span) -> Self { Self(span) } + + pub fn span(&self) -> &Span { + &self.0 + } } /// An `=` token. @@ -285,6 +303,10 @@ impl OpenParenthesis { pub fn new(span: Span) -> Self { Self(span) } + + pub fn span(&self) -> &Span { + &self.0 + } } /// An `)` token. diff --git a/stage0/src/pkg/meta.rs b/stage0/src/pkg/meta.rs index 698afc3..52b9c43 100644 --- a/stage0/src/pkg/meta.rs +++ b/stage0/src/pkg/meta.rs @@ -24,6 +24,20 @@ pub struct PackageVersion { patch: u32, } +impl PackageVersion { + pub fn major(&self) -> u32 { + self.major + } + + pub fn minor(&self) -> u32 { + self.minor + } + + pub fn patch(&self) -> u32 { + self.patch + } +} + impl<'a> Deserialize<'a> for PackageVersion { fn deserialize(deserializer: D) -> Result where diff --git a/stage0/src/project/meta.rs b/stage0/src/project/meta.rs index 7ceea62..de9e826 100644 --- a/stage0/src/project/meta.rs +++ b/stage0/src/project/meta.rs @@ -10,7 +10,7 @@ pub struct ProjectMeta { /// A package table of `.nitro` file. #[derive(Deserialize)] pub struct ProjectPackage { - pub name: String, + pub name: String, // TODO: Only allow a-z, 0-9 and . alphabets. #[serde(rename = "type")] pub ty: ProjectType, pub version: PackageVersion, diff --git a/stage0/src/project/mod.rs b/stage0/src/project/mod.rs index d254a98..33664ec 100644 --- a/stage0/src/project/mod.rs +++ b/stage0/src/project/mod.rs @@ -2,6 +2,7 @@ pub use self::meta::*; use crate::ast::{ParseError, SourceFile}; use crate::codegen::Codegen; +use crate::lexer::SyntaxError; use crate::pkg::{Arch, Package, PackageMeta}; use std::collections::{BTreeMap, VecDeque}; use std::ffi::CString; @@ -88,15 +89,28 @@ impl Project { } pub fn build(&mut self) -> Result { - // Compile the sources. + // Setup codegen context. let pkg = &self.meta.package; - let mut cx = Codegen::new(CString::new(pkg.name.as_str()).unwrap()); + let cx = Codegen::new( + &pkg.name, + &pkg.version, + CString::new(pkg.name.as_str()).unwrap(), + ); + + // Compile the sources. let mut bin = Arch::new(); let mut lib = Arch::new(); for (fqtn, src) in &self.sources { for im in src.impls() { - for func in im.functions() {} + for func in im.functions() { + let func = match func.build(&cx, &fqtn) { + Ok(v) => v, + Err(e) => { + return Err(ProjectBuildError::InvalidSyntax(src.path().to_owned(), e)); + } + }; + } } } @@ -176,4 +190,7 @@ pub enum ProjectLoadError { /// Represents an error when a [`Project`] is failed to build. #[derive(Debug, Error)] -pub enum ProjectBuildError {} +pub enum ProjectBuildError { + #[error("invalid syntax in {0}")] + InvalidSyntax(PathBuf, #[source] SyntaxError), +}