Skip to content
This repository was archived by the owner on Nov 23, 2024. It is now read-only.

Commit

Permalink
refactor: data structure (#223)
Browse files Browse the repository at this point in the history
### Summary of Changes

In this fix, the data structure is reworked to fit the model discussed
in the requirements.

Also, the analysis is changed regarding the resolving of references. 
`FunctionScope` class now holds target, value and call nodes for the
function, so we can iterate over these for each function.
It should now be more efficient and easier to understand since the
changes to the data structures below were used.

Added the class `Reference` to represent a node that references a
`Symbol`. The `ReferenceNode` class was extended with two subclasses
`TargetReference` - representing a Symbol referencing another Symbol,
and `ValueReference` - representing a Reference referencing a Symbol.
Both classes store the referenced symbols in a list.
Reworked the `Reasons` class so it now holds the Symbols for the
variables written to/ read from.
This blocks the merge of #211!


<!-- Please provide a summary of changes in this pull request, ensuring
all changes are explained. -->

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lukarade and megalinter-bot authored Feb 29, 2024
1 parent 4e51c55 commit 726ba5b
Show file tree
Hide file tree
Showing 19 changed files with 8,876 additions and 5,673 deletions.
4 changes: 1 addition & 3 deletions src/library_analyzer/cli/_run_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _run_api_command(
out_dir_path : Path
The path to the output directory.
docstring_style : DocstringStyle
The style of docstrings that used in the library.
The style of docstrings that is used in the library.
"""
api = get_api(package, src_dir_path, docstring_style)
out_file_api = out_dir_path.joinpath(f"{package}__api.json")
Expand All @@ -32,5 +32,3 @@ def _run_api_command(
api_dependencies = get_dependencies(api)
out_file_api_dependencies = out_dir_path.joinpath(f"{package}__api_dependencies.json")
api_dependencies.to_json_file(out_file_api_dependencies)

# TODO: call resolve_references here
389 changes: 294 additions & 95 deletions src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py

Large diffs are not rendered by default.

988 changes: 589 additions & 399 deletions src/library_analyzer/processing/api/purity_analysis/_get_module_data.py

Large diffs are not rendered by default.

488 changes: 239 additions & 249 deletions src/library_analyzer/processing/api/purity_analysis/_infer_purity.py

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,32 +1,14 @@
"""Data model for purity analysis."""

from library_analyzer.processing.api.purity_analysis.model._purity import (
CallOfParameter,
Expression,
FileRead,
FileWrite,
Impure,
ImpurityReason,
NativeCall,
NonLocalVariableRead,
NonLocalVariableWrite,
OpenMode,
ParameterAccess,
Pure,
PurityResult,
StringLiteral,
UnknownCall,
)
from library_analyzer.processing.api.purity_analysis.model._reference import (
from library_analyzer.processing.api.purity_analysis.model._call_graph import (
CallGraphForest,
CallGraphNode,
ReferenceNode,
)
from library_analyzer.processing.api.purity_analysis.model._scope import (
from library_analyzer.processing.api.purity_analysis.model._module_data import (
Builtin,
BuiltinOpen,
ClassScope,
ClassVariable,
FunctionReference,
FunctionScope,
GlobalVariable,
Import,
Expand All @@ -38,17 +20,42 @@
ModuleData,
NodeID,
Parameter,
Reasons,
Reference,
Scope,
Symbol,
)
from library_analyzer.processing.api.purity_analysis.model._purity import (
APIPurity,
CallOfParameter,
Expression,
FileRead,
FileWrite,
Impure,
ImpurityReason,
NativeCall,
NonLocalVariableRead,
NonLocalVariableWrite,
OpenMode,
ParameterAccess,
Pure,
PurityResult,
StringLiteral,
UnknownCall,
)
from library_analyzer.processing.api.purity_analysis.model._reference import (
ModuleAnalysisResult,
Reasons,
ReferenceNode,
TargetReference,
ValueReference,
)

__all__ = [
"ModuleAnalysisResult",
"ModuleData",
"Scope",
"ClassScope",
"FunctionScope",
"FunctionReference",
"MemberAccess",
"MemberAccessTarget",
"MemberAccessValue",
Expand Down Expand Up @@ -80,4 +87,9 @@
"NativeCall",
"UnknownCall",
"CallOfParameter",
"Reference",
"TargetReference",
"ValueReference",
"APIPurity",
"BuiltinOpen",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from library_analyzer.processing.api.purity_analysis.model._module_data import (
ClassScope,
FunctionScope,
NodeID,
)
from library_analyzer.processing.api.purity_analysis.model._reference import Reasons


@dataclass
class CallGraphNode:
"""Class for call graph nodes.
A call graph node represents a function in the call graph.
Attributes
----------
scope : FunctionScope | ClassScope
The function that the node represents.
This is a ClassScope if the class has a __init__ method.
In this case, the node is used for propagating the reasons of the
__init__ method to function calling the class.
reasons : Reasons
The raw Reasons for the node.
children : set[CallGraphNode]
The set of children of the node, (i.e., the set of nodes that this node calls)
combined_node_ids : list[NodeID]
A list of the names of all nodes that are combined into this node.
This is only set if the node is a combined node.
This is later used for transferring the reasons of the combined node to the original nodes.
is_builtin : bool
True if the function is a builtin function, False otherwise.
"""

scope: FunctionScope | ClassScope # TODO: change to symbol
reasons: (
Reasons # TODO: remove calls from reasons after they were added to the call graph (except for unknown calls)
)
children: set[CallGraphNode] = field(default_factory=set)
combined_node_ids: list[NodeID] = field(default_factory=list)
is_builtin: bool = False

def __hash__(self) -> int:
return hash(str(self))

def __repr__(self) -> str:
return f"{self.scope.symbol.id}"

def add_child(self, child: CallGraphNode) -> None:
"""Add a child to the node.
Parameters
----------
child : CallGraphNode
The child to add.
"""
self.children.add(child)

def is_leaf(self) -> bool:
"""Check if the node is a leaf node.
Returns
-------
bool
True if the node is a leaf node, False otherwise.
"""
return len(self.children) == 0

def combined_node_id_to_string(self) -> list[str]:
"""Return the combined node IDs as a string.
Returns
-------
str
The combined node IDs as a string.
"""
return [str(node_id) for node_id in self.combined_node_ids]


@dataclass
class CallGraphForest:
"""Class for call graph forests.
A call graph forest represents a collection of call graph trees.
Attributes
----------
graphs : dict[str, CallGraphNode]
The dictionary of call graph trees.
The key is the name of the tree, the value is the root CallGraphNode of the tree.
"""

graphs: dict[NodeID, CallGraphNode] = field(default_factory=dict)

def add_graph(self, graph_id: NodeID, graph: CallGraphNode) -> None:
"""Add a call graph tree to the forest.
Parameters
----------
graph_id : NodeID
The NodeID of the tree node.
graph : CallGraphNode
The root of the tree.
"""
self.graphs[graph_id] = graph

def get_graph(self, graph_id: NodeID) -> CallGraphNode:
"""Get a call graph tree from the forest.
Parameters
----------
graph_id : NodeID
The NodeID of the tree node to get.
Returns
-------
CallGraphNode
The CallGraphNode that is the root of the tree.
Raises
------
KeyError
If the graph_id is not in the forest.
"""
result = self.graphs.get(graph_id)
if result is None:
raise KeyError(f"Graph with id {graph_id} not found inside the call graph.")
return result

def has_graph(self, graph_id: NodeID) -> bool:
"""Check if the forest contains a call graph tree with the given NodeID.
Parameters
----------
graph_id : NodeID
The NodeID of the tree to check for.
Returns
-------
bool
True if the forest contains a tree with the given NodeID, False otherwise.
"""
return graph_id in self.graphs

def delete_graph(self, graph_id: NodeID) -> None:
"""Delete a call graph tree from the forest.
Parameters
----------
graph_id : NodeID
The NodeID of the tree to delete.
"""
del self.graphs[graph_id]
Loading

0 comments on commit 726ba5b

Please sign in to comment.