Skip to content

Commit 0bf6ac6

Browse files
authored
fs.Snapshot is declared in Rust (#11328)
### Problem Our fundamental filesystem types are declared in Python, but declaring them in Rust would cut down on FFI costs, and allow for better error messages by removing some usage of the `externs::getattr*` functions. Additionally, the previous change to move `fs.Digest` to Rust in #10905 missed the optimization opportunity of directly extracting the inner Rust `Digest` instance. ### Solution In two commits: port `fs.Snapshot` to Rust, and optimize the conversion of a `PyDigest` to a `Digest`. ### Result 6% faster runs of `./pants dependencies --transitive ::`. [ci skip-build-wheels]
1 parent 85673c8 commit 0bf6ac6

File tree

7 files changed

+122
-94
lines changed

7 files changed

+122
-94
lines changed

src/python/pants/engine/fs.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Union
77

88
from pants.engine.collection import Collection
9-
from pants.engine.internals.native_engine import PyDigest
9+
from pants.engine.internals.native_engine import PyDigest, PySnapshot
1010
from pants.engine.rules import QueryRule, side_effecting
1111
from pants.option.global_options import GlobMatchErrorBehavior as GlobMatchErrorBehavior
1212
from pants.util.meta import frozen_after_init
@@ -23,17 +23,12 @@
2323
Digest = PyDigest
2424

2525

26-
@dataclass(frozen=True)
27-
class Snapshot:
28-
"""A Snapshot is a collection of sorted file paths and dir paths fingerprinted by their
29-
names/content.
30-
31-
You can lift a `Digest` to a `Snapshot` with `await Get(Snapshot, Digest, my_digest)`.
32-
"""
26+
"""A Snapshot is a collection of sorted file paths and dir paths fingerprinted by their
27+
names/content.
3328
34-
digest: Digest
35-
files: Tuple[str, ...]
36-
dirs: Tuple[str, ...]
29+
You can lift a `Digest` to a `Snapshot` with `await Get(Snapshot, Digest, my_digest)`.
30+
"""
31+
Snapshot = PySnapshot
3732

3833

3934
@dataclass(frozen=True)
@@ -278,7 +273,7 @@ def write_digest(self, digest: Digest, *, path_prefix: Optional[str] = None) ->
278273
_EMPTY_FINGERPRINT = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
279274
EMPTY_DIGEST = Digest(fingerprint=_EMPTY_FINGERPRINT, serialized_bytes_length=0)
280275
EMPTY_FILE_DIGEST = FileDigest(fingerprint=_EMPTY_FINGERPRINT, serialized_bytes_length=0)
281-
EMPTY_SNAPSHOT = Snapshot(EMPTY_DIGEST, files=(), dirs=())
276+
EMPTY_SNAPSHOT = Snapshot()
282277

283278

284279
@dataclass(frozen=True)

src/python/pants/engine/internals/native_engine.pyi

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Callable, Dict, List
1+
from typing import Any, Callable, Dict, List, Tuple
22

33
# TODO: black and flake8 disagree about the content of this file:
44
# see https://github.com/psf/black/issues/1548
@@ -13,6 +13,15 @@ class PyDigest:
1313
@property
1414
def serialized_bytes_length(self) -> int: ...
1515

16+
class PySnapshot:
17+
def __init__(self) -> None: ...
18+
@property
19+
def digest(self) -> PyDigest: ...
20+
@property
21+
def dirs(self) -> Tuple[str, ...]: ...
22+
@property
23+
def files(self) -> Tuple[str, ...]: ...
24+
1625
class PyExecutionRequest:
1726
def __init__(self, **kwargs: Any) -> None: ...
1827

src/rust/engine/src/externs/engine_aware.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ pub struct Artifacts {}
8888
impl EngineAwareInformation for Artifacts {
8989
type MaybeOutput = Vec<(String, Digest)>;
9090

91-
fn retrieve(types: &Types, value: &Value) -> Option<Self::MaybeOutput> {
91+
fn retrieve(_types: &Types, value: &Value) -> Option<Self::MaybeOutput> {
9292
let artifacts_val = match externs::call_method(&value, "artifacts", &[]) {
9393
Ok(value) => value,
9494
Err(py_err) => {
@@ -119,7 +119,7 @@ impl EngineAwareInformation for Artifacts {
119119
log::error!("Error in EngineAware.artifacts() - no `digest` attr: {}", e);
120120
})
121121
.ok()?;
122-
let digest = match lift_directory_digest(types, &Value::new(digest_value)) {
122+
let digest = match lift_directory_digest(&Value::new(digest_value)) {
123123
Ok(digest) => digest,
124124
Err(e) => {
125125
log::error!("Error in EngineAware.artifacts() implementation: {}", e);

src/rust/engine/src/externs/fs.rs

+68-2
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,32 @@
3838
use std::borrow::Cow;
3939

4040
use cpython::{
41-
exc, py_class, CompareOp, PyErr, PyObject, PyResult, Python, PythonObject, ToPyObject,
41+
exc, py_class, CompareOp, PyErr, PyObject, PyResult, PyString, PyTuple, Python, PythonObject,
42+
ToPyObject,
4243
};
44+
use fs::PathStat;
4345
use hashing::{Digest, Fingerprint};
46+
use store::Snapshot;
4447

48+
///
49+
/// Data members and `create_instance` methods are module-private by default, so we expose them
50+
/// with public top-level functions.
51+
///
4552
/// TODO: See https://github.com/dgrunwald/rust-cpython/issues/242
46-
pub fn new_py_digest(digest: Digest) -> PyResult<PyDigest> {
53+
///
54+
55+
pub fn to_py_digest(digest: Digest) -> PyResult<PyDigest> {
4756
let gil = Python::acquire_gil();
4857
PyDigest::create_instance(gil.python(), digest)
4958
}
5059

60+
pub fn from_py_digest(digest: &PyObject) -> PyResult<Digest> {
61+
let gil = Python::acquire_gil();
62+
let py = gil.python();
63+
let py_digest = digest.extract::<PyDigest>(py)?;
64+
Ok(*py_digest.digest(py))
65+
}
66+
5167
py_class!(pub class PyDigest |py| {
5268
data digest: Digest;
5369
def __new__(_cls, fingerprint: Cow<str>, serialized_bytes_length: usize) -> PyResult<Self> {
@@ -84,3 +100,53 @@ py_class!(pub class PyDigest |py| {
84100
Ok(self.digest(py).0.prefix_hash())
85101
}
86102
});
103+
104+
pub fn to_py_snapshot(snapshot: Snapshot) -> PyResult<PySnapshot> {
105+
let gil = Python::acquire_gil();
106+
PySnapshot::create_instance(gil.python(), snapshot)
107+
}
108+
109+
py_class!(pub class PySnapshot |py| {
110+
data snapshot: Snapshot;
111+
def __new__(_cls) -> PyResult<Self> {
112+
Self::create_instance(py, Snapshot::empty())
113+
}
114+
115+
@property def digest(&self) -> PyResult<PyDigest> {
116+
to_py_digest(self.snapshot(py).digest)
117+
}
118+
119+
@property def files(&self) -> PyResult<PyTuple> {
120+
let files = self.snapshot(py).path_stats.iter().filter_map(|ps| match ps {
121+
PathStat::File { path, .. } => path.to_str(),
122+
_ => None,
123+
}).map(|ps| PyString::new(py, ps).into_object()).collect::<Vec<_>>();
124+
Ok(PyTuple::new(py, &files))
125+
}
126+
127+
@property def dirs(&self) -> PyResult<PyTuple> {
128+
let dirs = self.snapshot(py).path_stats.iter().filter_map(|ps| match ps {
129+
PathStat::Dir { path, .. } => path.to_str(),
130+
_ => None,
131+
}).map(|ps| PyString::new(py, ps).into_object()).collect::<Vec<_>>();
132+
Ok(PyTuple::new(py, &dirs))
133+
}
134+
135+
def __richcmp__(&self, other: PySnapshot, op: CompareOp) -> PyResult<PyObject> {
136+
match op {
137+
CompareOp::Eq => {
138+
let res = self.snapshot(py).digest == other.snapshot(py).digest;
139+
Ok(res.to_py_object(py).into_object())
140+
},
141+
CompareOp::Ne => {
142+
let res = self.snapshot(py).digest != other.snapshot(py).digest;
143+
Ok(res.to_py_object(py).into_object())
144+
}
145+
_ => Ok(py.NotImplemented()),
146+
}
147+
}
148+
149+
def __hash__(&self) -> PyResult<u64> {
150+
Ok(self.snapshot(py).digest.0.prefix_hash())
151+
}
152+
});

src/rust/engine/src/externs/interface.rs

+7-9
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ py_module_initializer!(native_engine, |py, m| {
412412
m.add_class::<externs::PyGeneratorResponseGetMulti>(py)?;
413413

414414
m.add_class::<externs::fs::PyDigest>(py)?;
415+
m.add_class::<externs::fs::PySnapshot>(py)?;
415416

416417
Ok(())
417418
});
@@ -985,7 +986,7 @@ async fn workunit_to_py_value(
985986
})?;
986987
artifact_entries.push((
987988
externs::store_utf8(artifact_name.as_str()),
988-
crate::nodes::Snapshot::store_snapshot(core, &snapshot).map_err(|err_str| {
989+
crate::nodes::Snapshot::store_snapshot(snapshot).map_err(|err_str| {
989990
let gil = Python::acquire_gil();
990991
let py = gil.python();
991992
PyErr::new::<exc::Exception, _>(py, (err_str,))
@@ -1561,10 +1562,7 @@ fn capture_snapshots(
15611562
if maybe_digest == externs::none() {
15621563
None
15631564
} else {
1564-
Some(nodes::lift_directory_digest(
1565-
&core.types,
1566-
&Value::new(maybe_digest),
1567-
)?)
1565+
Some(nodes::lift_directory_digest(&Value::new(maybe_digest))?)
15681566
}
15691567
};
15701568
path_globs.map(|path_globs| (path_globs, root, digest_hint))
@@ -1585,7 +1583,7 @@ fn capture_snapshots(
15851583
digest_hint,
15861584
)
15871585
.await?;
1588-
nodes::Snapshot::store_snapshot(&core, &snapshot)
1586+
nodes::Snapshot::store_snapshot(snapshot)
15891587
}
15901588
})
15911589
.collect::<Vec<_>>();
@@ -1614,7 +1612,7 @@ fn ensure_remote_has_recursive(
16141612
.iter(py)
16151613
.map(|item| {
16161614
let value = item.into();
1617-
crate::nodes::lift_directory_digest(&core.types, &value)
1615+
crate::nodes::lift_directory_digest(&value)
16181616
.or_else(|_| crate::nodes::lift_file_digest(&core.types, &value))
16191617
})
16201618
.collect::<Result<Vec<Digest>, _>>()
@@ -1695,7 +1693,7 @@ fn run_local_interactive_process(
16951693

16961694
let run_in_workspace: bool = externs::getattr(&value, "run_in_workspace").unwrap();
16971695
let input_digest_value: Value = externs::getattr(&value, "input_digest").unwrap();
1698-
let input_digest: Digest = nodes::lift_directory_digest(types, &input_digest_value)?;
1696+
let input_digest: Digest = nodes::lift_directory_digest(&input_digest_value)?;
16991697
let hermetic_env: bool = externs::getattr(&value, "hermetic_env").unwrap();
17001698
let env = externs::getattr_from_frozendict(&value, "env");
17011699

@@ -1737,7 +1735,7 @@ fn write_digest(
17371735
// TODO: A parent_id should be an explicit argument.
17381736
session.workunit_store().init_thread_state(None);
17391737

1740-
let lifted_digest = nodes::lift_directory_digest(&scheduler.core.types, &digest.into())
1738+
let lifted_digest = nodes::lift_directory_digest(&digest.into())
17411739
.map_err(|e| PyErr::new::<exc::ValueError, _>(py, (e,)))?;
17421740

17431741
// Python will have already validated that path_prefix is a relative path.

src/rust/engine/src/intrinsics.rs

+16-23
Original file line numberDiff line numberDiff line change
@@ -141,13 +141,12 @@ fn multi_platform_process_request_to_process_result(
141141
// TODO: The platform will be used in a followup.
142142
let _platform_val = &args[1];
143143

144-
let process_request = MultiPlatformExecuteProcess::lift(&context.core.types, process_val)
145-
.map_err(|str| {
146-
throw(&format!(
147-
"Error lifting MultiPlatformExecuteProcess: {}",
148-
str
149-
))
150-
})?;
144+
let process_request = MultiPlatformExecuteProcess::lift(process_val).map_err(|str| {
145+
throw(&format!(
146+
"Error lifting MultiPlatformExecuteProcess: {}",
147+
str
148+
))
149+
})?;
151150
let result = context.get(process_request).await?.0;
152151

153152
let maybe_stdout = context
@@ -205,7 +204,7 @@ fn directory_digest_to_digest_contents(
205204
args: Vec<Value>,
206205
) -> BoxFuture<'static, NodeResult<Value>> {
207206
async move {
208-
let digest = lift_directory_digest(&context.core.types, &args[0]).map_err(|s| throw(&s))?;
207+
let digest = lift_directory_digest(&args[0]).map_err(|s| throw(&s))?;
209208
let snapshot = context
210209
.core
211210
.store()
@@ -227,9 +226,8 @@ fn remove_prefix_request_to_digest(
227226
let store = core.store();
228227

229228
async move {
230-
let input_digest =
231-
lift_directory_digest(&core.types, &externs::getattr(&args[0], "digest").unwrap())
232-
.map_err(|e| throw(&e))?;
229+
let input_digest = lift_directory_digest(&externs::getattr(&args[0], "digest").unwrap())
230+
.map_err(|e| throw(&e))?;
233231
let prefix = externs::getattr_as_string(&args[0], "prefix");
234232
let prefix = RelativePath::new(PathBuf::from(prefix))
235233
.map_err(|e| throw(&format!("The `prefix` must be relative: {:?}", e)))?;
@@ -249,9 +247,8 @@ fn add_prefix_request_to_digest(
249247
let core = context.core;
250248
let store = core.store();
251249
async move {
252-
let input_digest =
253-
lift_directory_digest(&core.types, &externs::getattr(&args[0], "digest").unwrap())
254-
.map_err(|e| throw(&e))?;
250+
let input_digest = lift_directory_digest(&externs::getattr(&args[0], "digest").unwrap())
251+
.map_err(|e| throw(&e))?;
255252
let prefix = externs::getattr_as_string(&args[0], "prefix");
256253
let prefix = RelativePath::new(PathBuf::from(prefix))
257254
.map_err(|e| throw(&format!("The `prefix` must be relative: {:?}", e)))?;
@@ -265,12 +262,11 @@ fn add_prefix_request_to_digest(
265262
}
266263

267264
fn digest_to_snapshot(context: Context, args: Vec<Value>) -> BoxFuture<'static, NodeResult<Value>> {
268-
let core = context.core.clone();
269265
let store = context.core.store();
270266
async move {
271-
let digest = lift_directory_digest(&context.core.types, &args[0])?;
267+
let digest = lift_directory_digest(&args[0])?;
272268
let snapshot = store::Snapshot::from_digest(store, digest).await?;
273-
Snapshot::store_snapshot(&core, &snapshot)
269+
Snapshot::store_snapshot(snapshot)
274270
}
275271
.map_err(|e: String| throw(&e))
276272
.boxed()
@@ -286,7 +282,7 @@ fn merge_digests_request_to_digest(
286282
externs::getattr::<Vec<Value>>(&args[0], "digests")
287283
.unwrap()
288284
.into_iter()
289-
.map(|val: Value| lift_directory_digest(&core.types, &val))
285+
.map(|val: Value| lift_directory_digest(&val))
290286
.collect();
291287
async move {
292288
let digest = store
@@ -397,11 +393,8 @@ fn digest_subset_to_digest(
397393

398394
async move {
399395
let path_globs = Snapshot::lift_prepared_path_globs(&globs).map_err(|e| throw(&e))?;
400-
let original_digest = lift_directory_digest(
401-
&context.core.types,
402-
&externs::getattr(&args[0], "digest").unwrap(),
403-
)
404-
.map_err(|e| throw(&e))?;
396+
let original_digest = lift_directory_digest(&externs::getattr(&args[0], "digest").unwrap())
397+
.map_err(|e| throw(&e))?;
405398
let subset_params = SubsetParams { globs: path_globs };
406399
let digest = store
407400
.subset(original_digest, subset_params)

0 commit comments

Comments
 (0)