Skip to content

Commit f3ad6e9

Browse files
authored
feat: add search methods to Repository (#212)
Adds `Repository` methods to find a given path in multiple trees. `find_nodes_from_path`: Searches for an explicitly given path `find_matching_nodes`: Searches using an arbitrary matching criterion
1 parent 6495674 commit f3ad6e9

18 files changed

+496
-49
lines changed

crates/core/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ xattr = "1"
132132
[dev-dependencies]
133133
expect-test = "1.4.1"
134134
flate2 = "1.0.28"
135+
globset = "0.4.14"
135136
insta = { version = "1.36.1", features = ["redactions", "ron"] }
136137
mockall = "0.12.1"
137138
pretty_assertions = "1.4.0"

crates/core/src/backend/node.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ use crate::error::NodeErrorKind;
3030

3131
use crate::id::Id;
3232

33-
#[derive(Default, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Constructor)]
33+
#[derive(
34+
Default, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Constructor, PartialOrd, Ord,
35+
)]
3436
/// A node within the tree hierarchy
3537
pub struct Node {
3638
/// Name of the node: filename or dirname.
@@ -63,7 +65,7 @@ pub struct Node {
6365
}
6466

6567
#[serde_as]
66-
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
68+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
6769
#[serde(tag = "type", rename_all = "lowercase")]
6870
/// Types a [`Node`] can have with type-specific additional information
6971
pub enum NodeType {
@@ -190,7 +192,7 @@ impl Default for NodeType {
190192
Option => #[serde(default, skip_serializing_if = "Option::is_none")],
191193
u64 => #[serde(default, skip_serializing_if = "is_default")],
192194
)]
193-
#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq)]
195+
#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
194196
pub struct Metadata {
195197
/// Unix file mode
196198
pub mode: Option<u32>,
@@ -247,7 +249,7 @@ where
247249
}
248250

249251
/// Extended attribute of a [`Node`]
250-
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
252+
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)]
251253
pub struct ExtendedAttribute {
252254
/// Name of the extended attribute
253255
pub name: String,

crates/core/src/blob/tree.rs

+191-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{
22
cmp::Ordering,
3-
collections::{BTreeSet, BinaryHeap},
3+
collections::{BTreeMap, BTreeSet, BinaryHeap},
44
ffi::{OsStr, OsString},
55
mem,
66
path::{Component, Path, PathBuf, Prefix},
@@ -158,6 +158,196 @@ impl Tree {
158158

159159
Ok(node)
160160
}
161+
162+
pub(crate) fn find_nodes_from_path(
163+
be: &impl DecryptReadBackend,
164+
index: &impl ReadGlobalIndex,
165+
ids: impl IntoIterator<Item = Id>,
166+
path: &Path,
167+
) -> RusticResult<FindNode> {
168+
// helper function which is recursively called
169+
fn find_node_from_component(
170+
be: &impl DecryptReadBackend,
171+
index: &impl ReadGlobalIndex,
172+
tree_id: Id,
173+
path_comp: &[OsString],
174+
results_cache: &mut [BTreeMap<Id, Option<usize>>],
175+
nodes: &mut BTreeMap<Node, usize>,
176+
idx: usize,
177+
) -> RusticResult<Option<usize>> {
178+
if let Some(result) = results_cache[idx].get(&tree_id) {
179+
return Ok(*result);
180+
}
181+
182+
let tree = Tree::from_backend(be, index, tree_id)?;
183+
let result = if let Some(node) = tree
184+
.nodes
185+
.into_iter()
186+
.find(|node| node.name() == path_comp[idx])
187+
{
188+
if idx == path_comp.len() - 1 {
189+
let new_idx = nodes.len();
190+
let node_idx = nodes.entry(node).or_insert(new_idx);
191+
Some(*node_idx)
192+
} else {
193+
let id = node
194+
.subtree
195+
.ok_or_else(|| TreeErrorKind::NotADirectory(path_comp[idx].clone()))?;
196+
197+
find_node_from_component(
198+
be,
199+
index,
200+
id,
201+
path_comp,
202+
results_cache,
203+
nodes,
204+
idx + 1,
205+
)?
206+
}
207+
} else {
208+
None
209+
};
210+
_ = results_cache[idx].insert(tree_id, result);
211+
Ok(result)
212+
}
213+
214+
let path_comp: Vec<_> = path
215+
.components()
216+
.filter_map(|p| comp_to_osstr(p).transpose())
217+
.collect::<RusticResult<_>>()?;
218+
219+
// caching all results
220+
let mut results_cache = vec![BTreeMap::new(); path_comp.len()];
221+
let mut nodes = BTreeMap::new();
222+
223+
let matches: Vec<_> = ids
224+
.into_iter()
225+
.map(|id| {
226+
find_node_from_component(
227+
be,
228+
index,
229+
id,
230+
&path_comp,
231+
&mut results_cache,
232+
&mut nodes,
233+
0,
234+
)
235+
})
236+
.collect::<RusticResult<_>>()?;
237+
238+
// sort nodes by index and return a Vec
239+
let mut nodes: Vec<_> = nodes.into_iter().collect();
240+
nodes.sort_unstable_by_key(|n| n.1);
241+
let nodes = nodes.into_iter().map(|n| n.0).collect();
242+
243+
Ok(FindNode { nodes, matches })
244+
}
245+
246+
pub(crate) fn find_matching_nodes(
247+
be: &impl DecryptReadBackend,
248+
index: &impl ReadGlobalIndex,
249+
ids: impl IntoIterator<Item = Id>,
250+
matches: &impl Fn(&Path, &Node) -> bool,
251+
) -> RusticResult<FindMatches> {
252+
// internal state used to save match information in find_matching_nodes
253+
#[derive(Default)]
254+
struct MatchInternalState {
255+
// we cache all results
256+
cache: BTreeMap<(Id, PathBuf), Vec<(usize, usize)>>,
257+
nodes: BTreeMap<Node, usize>,
258+
paths: BTreeMap<PathBuf, usize>,
259+
}
260+
261+
impl MatchInternalState {
262+
fn insert_result(&mut self, path: PathBuf, node: Node) -> (usize, usize) {
263+
let new_idx = self.nodes.len();
264+
let node_idx = self.nodes.entry(node).or_insert(new_idx);
265+
let new_idx = self.paths.len();
266+
let node_path_idx = self.paths.entry(path).or_insert(new_idx);
267+
(*node_path_idx, *node_idx)
268+
}
269+
}
270+
271+
// helper function which is recursively called
272+
fn find_matching_nodes_recursive(
273+
be: &impl DecryptReadBackend,
274+
index: &impl ReadGlobalIndex,
275+
tree_id: Id,
276+
path: &Path,
277+
state: &mut MatchInternalState,
278+
matches: &impl Fn(&Path, &Node) -> bool,
279+
) -> RusticResult<Vec<(usize, usize)>> {
280+
let mut result = Vec::new();
281+
if let Some(result) = state.cache.get(&(tree_id, path.to_path_buf())) {
282+
return Ok(result.clone());
283+
}
284+
285+
let tree = Tree::from_backend(be, index, tree_id)?;
286+
for node in tree.nodes {
287+
let node_path = path.join(node.name());
288+
if node.is_dir() {
289+
let id = node
290+
.subtree
291+
.ok_or_else(|| TreeErrorKind::NotADirectory(node.name()))?;
292+
result.append(&mut find_matching_nodes_recursive(
293+
be, index, id, &node_path, state, matches,
294+
)?);
295+
}
296+
if matches(&node_path, &node) {
297+
result.push(state.insert_result(node_path, node));
298+
}
299+
}
300+
_ = state
301+
.cache
302+
.insert((tree_id, path.to_path_buf()), result.clone());
303+
Ok(result)
304+
}
305+
306+
let mut state = MatchInternalState::default();
307+
308+
let initial_path = PathBuf::new();
309+
let matches: Vec<_> = ids
310+
.into_iter()
311+
.map(|id| {
312+
find_matching_nodes_recursive(be, index, id, &initial_path, &mut state, matches)
313+
})
314+
.collect::<RusticResult<_>>()?;
315+
316+
// sort paths by index and return a Vec
317+
let mut paths: Vec<_> = state.paths.into_iter().collect();
318+
paths.sort_unstable_by_key(|n| n.1);
319+
let paths = paths.into_iter().map(|n| n.0).collect();
320+
321+
// sort nodes by index and return a Vec
322+
let mut nodes: Vec<_> = state.nodes.into_iter().collect();
323+
nodes.sort_unstable_by_key(|n| n.1);
324+
let nodes = nodes.into_iter().map(|n| n.0).collect();
325+
Ok(FindMatches {
326+
paths,
327+
nodes,
328+
matches,
329+
})
330+
}
331+
}
332+
333+
/// Results from `find_node_from_path`
334+
#[derive(Debug, Serialize)]
335+
pub struct FindNode {
336+
/// found nodes for the given path
337+
pub nodes: Vec<Node>,
338+
/// found nodes for all given snapshots. usize is the index of the node
339+
pub matches: Vec<Option<usize>>,
340+
}
341+
342+
/// Results from `find_matching_nodes`
343+
#[derive(Debug, Serialize)]
344+
pub struct FindMatches {
345+
/// found matching paths
346+
pub paths: Vec<PathBuf>,
347+
/// found matching nodes
348+
pub nodes: Vec<Node>,
349+
/// found paths/nodes for all given snapshots. (usize,usize) is the path / node index
350+
pub matches: Vec<Vec<(usize, usize)>>,
161351
}
162352

163353
/// Converts a [`Component`] to an [`OsString`].

crates/core/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ pub use crate::{
129129
FileType, ReadBackend, ReadSource, ReadSourceEntry, ReadSourceOpen, RepositoryBackends,
130130
WriteBackend, ALL_FILE_TYPES,
131131
},
132-
blob::tree::TreeStreamerOptions as LsOptions,
132+
blob::tree::{FindMatches, FindNode, TreeStreamerOptions as LsOptions},
133133
commands::{
134134
backup::{BackupOptions, ParentOptions},
135135
check::CheckOptions,

crates/core/src/repository.rs

+35-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::{
2525
FileType, ReadBackend, WriteBackend,
2626
},
2727
blob::{
28-
tree::{NodeStreamer, TreeStreamerOptions as LsOptions},
28+
tree::{FindMatches, FindNode, NodeStreamer, TreeStreamerOptions as LsOptions},
2929
BlobType,
3030
},
3131
commands::{
@@ -1499,6 +1499,40 @@ impl<P, S: IndexedTree> Repository<P, S> {
14991499
pub fn node_from_path(&self, root_tree: Id, path: &Path) -> RusticResult<Node> {
15001500
Tree::node_from_path(self.dbe(), self.index(), root_tree, Path::new(path))
15011501
}
1502+
1503+
/// Get all [`Node`]s from given root trees and a path
1504+
///
1505+
/// # Arguments
1506+
///
1507+
/// * `ids` - The tree ids to search in
1508+
/// * `path` - The path
1509+
///
1510+
/// # Errors
1511+
/// if loading trees from the backend fails
1512+
pub fn find_nodes_from_path(
1513+
&self,
1514+
ids: impl IntoIterator<Item = Id>,
1515+
path: &Path,
1516+
) -> RusticResult<FindNode> {
1517+
Tree::find_nodes_from_path(self.dbe(), self.index(), ids, path)
1518+
}
1519+
1520+
/// Get all [`Node`]s/[`Path`]s from given root trees and a matching criterion
1521+
///
1522+
/// # Arguments
1523+
///
1524+
/// * `ids` - The tree ids to search in
1525+
/// * `matches` - The matching criterion
1526+
///
1527+
/// # Errors
1528+
/// if loading trees from the backend fails
1529+
pub fn find_matching_nodes(
1530+
&self,
1531+
ids: impl IntoIterator<Item = Id>,
1532+
matches: &impl Fn(&Path, &Node) -> bool,
1533+
) -> RusticResult<FindMatches> {
1534+
Tree::find_matching_nodes(self.dbe(), self.index(), ids, matches)
1535+
}
15021536
}
15031537

15041538
impl<P: ProgressBars, S: IndexedTree> Repository<P, S> {

0 commit comments

Comments
 (0)