Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[red-knot] Implicit instance attributes #15811

Merged
merged 11 commits into from
Feb 3, 2025
35 changes: 21 additions & 14 deletions crates/red_knot_python_semantic/src/semantic_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use ruff_index::{IndexSlice, IndexVec};
use crate::module_name::ModuleName;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIds;
use crate::semantic_index::attribute_assignment::AttributeAssignment;
use crate::semantic_index::attribute_assignment::AttributeAssignments;
use crate::semantic_index::builder::SemanticIndexBuilder;
use crate::semantic_index::definition::{Definition, DefinitionNodeKey};
use crate::semantic_index::expression::Expression;
Expand Down Expand Up @@ -95,6 +95,25 @@ pub(crate) fn use_def_map<'db>(db: &'db dyn Db, scope: ScopeId<'db>) -> Arc<UseD
index.use_def_map(scope.file_scope_id(db))
}

/// Returns all attribute assignments for a specific class body scope.
///
/// Using [`attribute_assignments`] over [`semantic_index`] has the advantage that
/// Salsa can avoid invalidating dependent queries if this scope's instance attributes
/// are unchanged.
#[salsa::tracked]
pub(crate) fn attribute_assignments<'db>(
db: &'db dyn Db,
class_body_scope: ScopeId<'db>,
) -> Option<Arc<AttributeAssignments<'db>>> {
let file = class_body_scope.file(db);
let index = semantic_index(db, file);

index
.attribute_assignments
.get(&class_body_scope.file_scope_id(db))
.cloned()
}

/// Returns the module global scope of `file`.
#[salsa::tracked]
pub(crate) fn global_scope(db: &dyn Db, file: File) -> ScopeId<'_> {
Expand Down Expand Up @@ -144,8 +163,7 @@ pub(crate) struct SemanticIndex<'db> {

/// Maps from class body scopes to attribute assignments that were found
/// in methods of that class.
attribute_assignments:
FxHashMap<FileScopeId, FxHashMap<&'db str, Vec<AttributeAssignment<'db>>>>,
attribute_assignments: FxHashMap<FileScopeId, Arc<AttributeAssignments<'db>>>,
}

impl<'db> SemanticIndex<'db> {
Expand Down Expand Up @@ -271,17 +289,6 @@ impl<'db> SemanticIndex<'db> {
pub(super) fn has_future_annotations(&self) -> bool {
self.has_future_annotations
}

pub(super) fn attribute_assignments(
&self,
db: &dyn Db,
class_body_scope_id: ScopeId,
attribute: &str,
) -> Option<&[AttributeAssignment<'db>]> {
self.attribute_assignments
.get(&class_body_scope_id.file_scope_id(db))
.and_then(|assignments| assignments.get(attribute).map(Vec::as_slice))
}
}

pub struct AncestorsIter<'a> {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use rustc_hash::FxHashMap;

use crate::semantic_index::expression::Expression;

/// Describes an (annotated) attribute assignment that we discovered in a method
Expand All @@ -11,3 +13,5 @@ pub(crate) enum AttributeAssignment<'db> {
/// An attribute assignment without a type annotation, e.g. `self.x = <value>`.
Unannotated { value: Expression<'db> },
}

pub(crate) type AttributeAssignments<'db> = FxHashMap<&'db str, Vec<AttributeAssignment<'db>>>;
11 changes: 7 additions & 4 deletions crates/red_knot_python_semantic/src/semantic_index/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::ast_node_ref::AstNodeRef;
use crate::module_name::ModuleName;
use crate::semantic_index::ast_ids::node_key::ExpressionNodeKey;
use crate::semantic_index::ast_ids::AstIdsBuilder;
use crate::semantic_index::attribute_assignment::AttributeAssignment;
use crate::semantic_index::attribute_assignment::{AttributeAssignment, AttributeAssignments};
use crate::semantic_index::constraint::PatternConstraintKind;
use crate::semantic_index::definition::{
AssignmentDefinitionNodeRef, ComprehensionDefinitionNodeRef, Definition, DefinitionNodeKey,
Expand Down Expand Up @@ -92,8 +92,7 @@ pub(super) struct SemanticIndexBuilder<'db> {
definitions_by_node: FxHashMap<DefinitionNodeKey, Definition<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
imported_modules: FxHashSet<ModuleName>,
attribute_assignments:
FxHashMap<FileScopeId, FxHashMap<&'db str, Vec<AttributeAssignment<'db>>>>,
attribute_assignments: FxHashMap<FileScopeId, AttributeAssignments<'db>>,
}

impl<'db> SemanticIndexBuilder<'db> {
Expand Down Expand Up @@ -744,7 +743,11 @@ impl<'db> SemanticIndexBuilder<'db> {
use_def_maps,
imported_modules: Arc::new(self.imported_modules),
has_future_annotations: self.has_future_annotations,
attribute_assignments: self.attribute_assignments,
attribute_assignments: self
.attribute_assignments
.into_iter()
.map(|(k, v)| (k, Arc::new(v)))
.collect(),
}
}
}
Expand Down
32 changes: 19 additions & 13 deletions crates/red_knot_python_semantic/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ use crate::module_resolver::{file_to_module, resolve_module, KnownModule};
use crate::semantic_index::ast_ids::HasScopedExpressionId;
use crate::semantic_index::attribute_assignment::AttributeAssignment;
use crate::semantic_index::definition::Definition;
use crate::semantic_index::expression::Expression;
use crate::semantic_index::symbol::{self as symbol, ScopeId, ScopedSymbolId};
use crate::semantic_index::{
global_scope, imported_modules, semantic_index, symbol_table, use_def_map,
BindingWithConstraints, BindingWithConstraintsIterator, DeclarationWithConstraint,
attribute_assignments, global_scope, imported_modules, semantic_index, symbol_table,
use_def_map, BindingWithConstraints, BindingWithConstraintsIterator, DeclarationWithConstraint,
DeclarationsIterator,
};
use crate::stdlib::{builtins_symbol, known_module_symbol, typing_extensions_symbol};
Expand Down Expand Up @@ -4146,7 +4147,15 @@ impl<'db> Class<'db> {
name: &str,
inferred_type_from_class_body: Option<Type<'db>>,
) -> Symbol<'db> {
let index = semantic_index(db, class_body_scope.file(db));
// We use a separate salsa query here to prevent unrelated changes in the AST of an external
// file from triggering re-evaluations of downstream queries.
// See the `dependency_implicit_instance_attribute` test for more information.
#[salsa::tracked]
fn infer_expression_type<'db>(db: &'db dyn Db, expression: Expression<'db>) -> Type<'db> {
let inference = infer_expression_types(db, expression);
let expr_scope = expression.scope(db);
inference.expression_type(expression.node_ref(db).scoped_expression_id(db, expr_scope))
}
Comment on lines +4150 to +4158
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this would be useful to have as a general query and if it should be defined next to infer_expression_types instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a similar thought. It could certainly be used in more places, but I wasn't sure if we want the additional query in those places? I'll note it down as a task and open a small PR after this has been merged.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is intuitive to me why attribute_assignments should be a query, but it's less intuitive to me why this should be a query. I don't see what additional dependencies are introduced here above those that the underlying infer_expression_types query would have anyway. So I guess this is not about dependencies, but rather about a smaller returned value so backdating can be more effective?

It feels intuitively to me that we would get more caching value with fewer cached memos if we placed the salsa query caching at the level of Class::own_instance_member or Type::member, rather than at such a fine-grained level that does such little work over the underlying infer_expression_types. But we should of course validate any such changes with observed performance differences.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is the expression.node_ref. It accesses the AST node from the module where the class is declared, meaning the enclosing query has to re-run whenever the declaring module changes -- which we don't want.

Doing it here is mainly about avoiding calling a query (and caching the value which is expensive) in cases where we don't need to. Although I admit, I don't have any numbers on whether caching here is a significantly lower number than caching at the own_instance_member level.


// If we do not see any declarations of an attribute, neither in the class body nor in
// any method, we build a union of `Unknown` with the inferred types of all bindings of
Expand All @@ -4158,7 +4167,11 @@ impl<'db> Class<'db> {
union_of_inferred_types = union_of_inferred_types.add(ty);
}

let Some(attribute_assignments) = index.attribute_assignments(db, class_body_scope, name)
let attribute_assignments = attribute_assignments(db, class_body_scope);

let Some(attribute_assignments) = attribute_assignments
.as_deref()
.and_then(|assignments| assignments.get(name))
else {
if inferred_type_from_class_body.is_some() {
return union_of_inferred_types.build().into();
Expand All @@ -4175,11 +4188,7 @@ impl<'db> Class<'db> {
// self.name: <annotation>
// self.name: <annotation> = …

let inference = infer_expression_types(db, *annotation);
let expr_scope = annotation.scope(db);
let annotation_ty = inference.expression_type(
annotation.node_ref(db).scoped_expression_id(db, expr_scope),
);
let annotation_ty = infer_expression_type(db, *annotation);

// TODO: check if there are conflicting declarations
return annotation_ty.into();
Expand All @@ -4189,10 +4198,7 @@ impl<'db> Class<'db> {
//
// self.name = <value>

let inference = infer_expression_types(db, *value);
let expr_scope = value.scope(db);
let inferred_ty = inference
.expression_type(value.node_ref(db).scoped_expression_id(db, expr_scope));
let inferred_ty = infer_expression_type(db, *value);

union_of_inferred_types = union_of_inferred_types.add(inferred_ty);
}
Expand Down
82 changes: 81 additions & 1 deletion crates/red_knot_python_semantic/src/types/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6026,7 +6026,7 @@ mod tests {
use crate::types::check_types;
use ruff_db::files::{system_path_to_file, File};
use ruff_db::system::DbWithTestSystem;
use ruff_db::testing::assert_function_query_was_not_run;
use ruff_db::testing::{assert_function_query_was_not_run, assert_function_query_was_run};

use super::*;

Expand Down Expand Up @@ -6353,4 +6353,84 @@ mod tests {
);
Ok(())
}

#[test]
fn dependency_implicit_instance_attribute() -> anyhow::Result<()> {
fn x_rhs_expression(db: &TestDb) -> Expression<'_> {
let file_main = system_path_to_file(db, "/src/main.py").unwrap();
let ast = parsed_module(db, file_main);
// Get the second statement in `main.py` (x = …) and extract the expression
// node on the right-hand side:
let x_rhs_node = &ast.syntax().body[1].as_assign_stmt().unwrap().value;

let index = semantic_index(db, file_main);
index.expression(x_rhs_node.as_ref())
}

let mut db = setup_db();

db.write_dedented(
"/src/mod.py",
r#"
class C:
def f(self):
self.attr: int | None = None
"#,
)?;
db.write_dedented(
"/src/main.py",
r#"
from mod import C
x = C().attr
"#,
)?;

let file_main = system_path_to_file(&db, "/src/main.py").unwrap();
let attr_ty = global_symbol(&db, file_main, "x").expect_type();
assert_eq!(attr_ty.display(&db).to_string(), "Unknown | int | None");

// Change the type of `attr` to `str | None`; this should trigger the type of `x` to be re-inferred
db.write_dedented(
"/src/mod.py",
r#"
class C:
def f(self):
self.attr: str | None = None
"#,
)?;

let events = {
db.clear_salsa_events();
let attr_ty = global_symbol(&db, file_main, "x").expect_type();
assert_eq!(attr_ty.display(&db).to_string(), "Unknown | str | None");
db.take_salsa_events()
};
assert_function_query_was_run(&db, infer_expression_types, x_rhs_expression(&db), &events);

// Add a comment; this should not trigger the type of `x` to be re-inferred
db.write_dedented(
"/src/mod.py",
r#"
class C:
def f(self):
# a comment!
self.attr: str | None = None
"#,
)?;

let events = {
db.clear_salsa_events();
let attr_ty = global_symbol(&db, file_main, "x").expect_type();
assert_eq!(attr_ty.display(&db).to_string(), "Unknown | str | None");
db.take_salsa_events()
};
assert_function_query_was_not_run(
&db,
infer_expression_types,
x_rhs_expression(&db),
&events,
);

Ok(())
}
}
Loading