From b9c8113a8a80a464fd042f290553817d8024fdfc Mon Sep 17 00:00:00 2001 From: Teodoro Freund Date: Thu, 22 Aug 2024 21:27:15 +0100 Subject: [PATCH] Added bytes type and some inference (#13061) ## Summary This PR adds the `bytes` type to red-knot: - Added the `bytes` type - Added support for bytes literals - Support for the `+` operator Improves on #12701 Big TODO on supporting and normalizing r-prefixed bytestrings (`rb"hello\n"`) ## Test Plan Added a test for a bytes literals, concatenation, and corner values --- Cargo.lock | 1 + crates/red_knot_python_semantic/Cargo.toml | 1 + crates/red_knot_python_semantic/src/types.rs | 12 +++++ .../src/types/display.rs | 11 ++++ .../src/types/infer.rs | 52 +++++++++++++++++-- crates/ruff_python_ast/src/nodes.rs | 2 +- 6 files changed, 73 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e5924fa28a699..2e3301b0c51cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1926,6 +1926,7 @@ dependencies = [ "ruff_db", "ruff_index", "ruff_python_ast", + "ruff_python_literal", "ruff_python_parser", "ruff_python_stdlib", "ruff_source_file", diff --git a/crates/red_knot_python_semantic/Cargo.toml b/crates/red_knot_python_semantic/Cargo.toml index bf6daaa8e588c..d0619955434ac 100644 --- a/crates/red_knot_python_semantic/Cargo.toml +++ b/crates/red_knot_python_semantic/Cargo.toml @@ -17,6 +17,7 @@ ruff_python_ast = { workspace = true } ruff_python_stdlib = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } +ruff_python_literal = { workspace = true } anyhow = { workspace = true } bitflags = { workspace = true } diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index 173c957d1a28e..4f2d101bf4ffc 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -181,6 +181,8 @@ pub enum Type<'db> { IntLiteral(i64), /// A boolean literal, either `True` or `False`. BooleanLiteral(bool), + /// A bytes literal + BytesLiteral(BytesLiteralType<'db>), // TODO protocols, callable types, overloads, generics, type vars } @@ -276,6 +278,10 @@ impl<'db> Type<'db> { Type::Unknown } Type::BooleanLiteral(_) => Type::Unknown, + Type::BytesLiteral(_) => { + // TODO defer to Type::Instance().member + Type::Unknown + } } } @@ -372,6 +378,12 @@ pub struct IntersectionType<'db> { negative: FxOrderSet>, } +#[salsa::interned] +pub struct BytesLiteralType<'db> { + #[return_ref] + value: Box<[u8]>, +} + #[cfg(test)] mod tests { use anyhow::Context; diff --git a/crates/red_knot_python_semantic/src/types/display.rs b/crates/red_knot_python_semantic/src/types/display.rs index 7de3f9ebf7c88..4ce811ae5a624 100644 --- a/crates/red_knot_python_semantic/src/types/display.rs +++ b/crates/red_knot_python_semantic/src/types/display.rs @@ -2,6 +2,9 @@ use std::fmt::{Display, Formatter}; +use ruff_python_ast::str::Quote; +use ruff_python_literal::escape::AsciiEscape; + use crate::types::{IntersectionType, Type, UnionType}; use crate::Db; @@ -38,6 +41,14 @@ impl Display for DisplayType<'_> { Type::BooleanLiteral(boolean) => { write!(f, "Literal[{}]", if *boolean { "True" } else { "False" }) } + Type::BytesLiteral(bytes) => { + let escape = + AsciiEscape::with_preferred_quote(bytes.value(self.db).as_ref(), Quote::Double); + + f.write_str("Literal[")?; + escape.bytes_repr().write(f)?; + f.write_str("]") + } } } } diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 6f00e36b63029..45803a61ce8da 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -43,8 +43,8 @@ use crate::semantic_index::symbol::{FileScopeId, NodeWithScopeKind, NodeWithScop use crate::semantic_index::SemanticIndex; use crate::types::diagnostic::{TypeCheckDiagnostic, TypeCheckDiagnostics}; use crate::types::{ - builtins_symbol_ty_by_name, definitions_ty, global_symbol_ty_by_name, ClassType, FunctionType, - Name, Type, UnionBuilder, + builtins_symbol_ty_by_name, definitions_ty, global_symbol_ty_by_name, BytesLiteralType, + ClassType, FunctionType, Name, Type, UnionBuilder, }; use crate::Db; @@ -1206,9 +1206,12 @@ impl<'db> TypeInferenceBuilder<'db> { } #[allow(clippy::unused_self)] - fn infer_bytes_literal_expression(&mut self, _literal: &ast::ExprBytesLiteral) -> Type<'db> { - // TODO - Type::Unknown + fn infer_bytes_literal_expression(&mut self, literal: &ast::ExprBytesLiteral) -> Type<'db> { + // TODO: ignoring r/R prefixes for now, should normalize bytes values + Type::BytesLiteral(BytesLiteralType::new( + self.db, + literal.value.bytes().collect(), + )) } fn infer_fstring_expression(&mut self, fstring: &ast::ExprFString) -> Type<'db> { @@ -1684,6 +1687,7 @@ impl<'db> TypeInferenceBuilder<'db> { let left_ty = self.infer_expression(left); let right_ty = self.infer_expression(right); + // TODO flatten the matches by matching on (left_ty, right_ty, op) match left_ty { Type::Any => Type::Any, Type::Unknown => Type::Unknown, @@ -1722,6 +1726,22 @@ impl<'db> TypeInferenceBuilder<'db> { _ => Type::Unknown, // TODO } } + Type::BytesLiteral(lhs) => { + match right_ty { + Type::BytesLiteral(rhs) => { + match op { + ast::Operator::Add => Type::BytesLiteral(BytesLiteralType::new( + self.db, + [lhs.value(self.db).as_ref(), rhs.value(self.db).as_ref()] + .concat() + .into_boxed_slice(), + )), + _ => Type::Unknown, // TODO + } + } + _ => Type::Unknown, // TODO + } + } _ => Type::Unknown, // TODO } } @@ -2235,6 +2255,28 @@ mod tests { Ok(()) } + #[test] + fn bytes_type() -> anyhow::Result<()> { + let mut db = setup_db(); + + db.write_dedented( + "src/a.py", + " + w = b'red' b'knot' + x = b'hello' + y = b'world' + b'!' + z = b'\\xff\\x00' + ", + )?; + + assert_public_ty(&db, "src/a.py", "w", "Literal[b\"redknot\"]"); + assert_public_ty(&db, "src/a.py", "x", "Literal[b\"hello\"]"); + assert_public_ty(&db, "src/a.py", "y", "Literal[b\"world!\"]"); + assert_public_ty(&db, "src/a.py", "z", "Literal[b\"\\xff\\x00\"]"); + + Ok(()) + } + #[test] fn resolve_union() -> anyhow::Result<()> { let mut db = setup_db(); diff --git a/crates/ruff_python_ast/src/nodes.rs b/crates/ruff_python_ast/src/nodes.rs index 644d6c4ba7fbc..079e9003b8e92 100644 --- a/crates/ruff_python_ast/src/nodes.rs +++ b/crates/ruff_python_ast/src/nodes.rs @@ -2152,7 +2152,7 @@ impl BytesLiteralValue { } /// Returns an iterator over the bytes of the concatenated bytes. - fn bytes(&self) -> impl Iterator + '_ { + pub fn bytes(&self) -> impl Iterator + '_ { self.iter().flat_map(|part| part.as_slice().iter().copied()) } }