Skip to content

Commit 0605c9c

Browse files
authored
feat: Add Repository::to_indexed_checked and ::to_index_ids_checked() (#168)
This allows to use a repository even in the case of missing index files without running an index repair. Instead the information is read from the pack files and stored in-memory but not written. This allows to use real read-only commands on repository with defect index.
1 parent 3907a5c commit 0605c9c

File tree

4 files changed

+221
-99
lines changed

4 files changed

+221
-99
lines changed

crates/core/src/commands/check.rs

+34-40
Original file line numberDiff line numberDiff line change
@@ -269,50 +269,44 @@ fn check_packs(
269269
IndexType::DataIds
270270
});
271271

272-
let mut process_pack = |p: IndexPack, check_time: bool| {
273-
let blob_type = p.blob_type();
274-
let pack_size = p.pack_size();
275-
_ = packs.insert(p.id, pack_size);
276-
if hot_be.is_some() && blob_type == BlobType::Tree {
277-
_ = tree_packs.insert(p.id, pack_size);
278-
}
279-
280-
// Check if time is set _
281-
if check_time && p.time.is_none() {
282-
error!("pack {}: No time is set! Run prune to correct this!", p.id);
283-
}
284-
285-
// check offsests in index
286-
let mut expected_offset: u32 = 0;
287-
let mut blobs = p.blobs;
288-
blobs.sort_unstable();
289-
for blob in blobs {
290-
if blob.tpe != blob_type {
291-
error!(
292-
"pack {}: blob {} blob type does not match: type: {:?}, expected: {:?}",
293-
p.id, blob.id, blob.tpe, blob_type
294-
);
295-
}
296-
297-
if blob.offset != expected_offset {
298-
error!(
299-
"pack {}: blob {} offset in index: {}, expected: {}",
300-
p.id, blob.id, blob.offset, expected_offset
301-
);
302-
}
303-
expected_offset += blob.length;
304-
}
305-
};
306-
307272
let p = pb.progress_counter("reading index...");
308273
for index in be.stream_all::<IndexFile>(&p)? {
309274
let index = index?.1;
310275
index_collector.extend(index.packs.clone());
311-
for p in index.packs {
312-
process_pack(p, false);
313-
}
314-
for p in index.packs_to_delete {
315-
process_pack(p, true);
276+
for (p, to_delete) in index.all_packs() {
277+
let check_time = to_delete; // Check if time is set for packs marked to delete
278+
let blob_type = p.blob_type();
279+
let pack_size = p.pack_size();
280+
_ = packs.insert(p.id, pack_size);
281+
if hot_be.is_some() && blob_type == BlobType::Tree {
282+
_ = tree_packs.insert(p.id, pack_size);
283+
}
284+
285+
// Check if time is set _
286+
if check_time && p.time.is_none() {
287+
error!("pack {}: No time is set! Run prune to correct this!", p.id);
288+
}
289+
290+
// check offsests in index
291+
let mut expected_offset: u32 = 0;
292+
let mut blobs = p.blobs;
293+
blobs.sort_unstable();
294+
for blob in blobs {
295+
if blob.tpe != blob_type {
296+
error!(
297+
"pack {}: blob {} blob type does not match: type: {:?}, expected: {:?}",
298+
p.id, blob.id, blob.tpe, blob_type
299+
);
300+
}
301+
302+
if blob.offset != expected_offset {
303+
error!(
304+
"pack {}: blob {} offset in index: {}, expected: {}",
305+
p.id, blob.id, blob.offset, expected_offset
306+
);
307+
}
308+
expected_offset += blob.length;
309+
}
316310
}
317311
}
318312

crates/core/src/commands/repair/index.rs

+111-53
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ use crate::{
1010
FileType, ReadBackend, WriteBackend,
1111
},
1212
error::{CommandErrorKind, RusticErrorKind, RusticResult},
13-
index::indexer::Indexer,
13+
index::{binarysorted::IndexCollector, indexer::Indexer, GlobalIndex},
1414
progress::{Progress, ProgressBars},
1515
repofile::{IndexFile, IndexPack, PackHeader, PackHeaderRef},
1616
repository::{Open, Repository},
17+
Id,
1718
};
1819

1920
#[cfg_attr(feature = "clap", derive(clap::Parser))]
@@ -49,60 +50,14 @@ impl RepairIndexOptions {
4950
CommandErrorKind::NotAllowedWithAppendOnly("index repair".to_string()).into(),
5051
);
5152
}
52-
let be = repo.dbe();
53-
let p = repo.pb.progress_spinner("listing packs...");
54-
let mut packs: HashMap<_, _> = be
55-
.list_with_size(FileType::Pack)
56-
.map_err(RusticErrorKind::Backend)?
57-
.into_iter()
58-
.collect();
59-
p.finish();
60-
61-
let mut pack_read_header = Vec::new();
62-
63-
let mut process_pack = |p: IndexPack,
64-
to_delete: bool,
65-
new_index: &mut IndexFile,
66-
changed: &mut bool| {
67-
let index_size = p.pack_size();
68-
let id = p.id;
69-
match packs.remove(&id) {
70-
None => {
71-
// this pack either does not exist or was already indexed in another index file => remove from index!
72-
*changed = true;
73-
debug!("removing non-existing pack {id} from index");
74-
}
75-
Some(size) => {
76-
if index_size != size {
77-
info!("pack {id}: size computed by index: {index_size}, actual size: {size}, will re-read header");
78-
}
7953

80-
if index_size != size || self.read_all {
81-
// pack exists, but sizes do not match or we want to read all pack files
82-
pack_read_header.push((
83-
id,
84-
Some(PackHeaderRef::from_index_pack(&p).size()),
85-
size,
86-
));
87-
*changed = true;
88-
} else {
89-
new_index.add(p, to_delete);
90-
}
91-
}
92-
}
93-
};
54+
let be = repo.dbe();
55+
let mut checker = PackChecker::new(repo)?;
9456

9557
let p = repo.pb.progress_counter("reading index...");
9658
for index in be.stream_all::<IndexFile>(&p)? {
9759
let (index_id, index) = index?;
98-
let mut new_index = IndexFile::default();
99-
let mut changed = false;
100-
for p in index.packs {
101-
process_pack(p, false, &mut new_index, &mut changed);
102-
}
103-
for p in index.packs_to_delete {
104-
process_pack(p, true, &mut new_index, &mut changed);
105-
}
60+
let (new_index, changed) = checker.check_pack(index, self.read_all);
10661
match (changed, dry_run) {
10762
(true, true) => info!("would have modified index file {index_id}"),
10863
(true, false) => {
@@ -117,9 +72,7 @@ impl RepairIndexOptions {
11772
}
11873
p.finish();
11974

120-
// process packs which are listed but not contained in the index
121-
pack_read_header.extend(packs.into_iter().map(|(id, size)| (id, None, size)));
122-
75+
let pack_read_header = checker.into_pack_to_read();
12376
repo.warm_up_wait(pack_read_header.iter().map(|(id, _, _)| *id))?;
12477

12578
let indexer = Indexer::new(be.clone()).into_shared();
@@ -156,3 +109,108 @@ impl RepairIndexOptions {
156109
Ok(())
157110
}
158111
}
112+
113+
struct PackChecker {
114+
packs: HashMap<Id, u32>,
115+
packs_to_read: Vec<(Id, Option<u32>, u32)>,
116+
}
117+
118+
impl PackChecker {
119+
fn new<P: ProgressBars, S: Open>(repo: &Repository<P, S>) -> RusticResult<Self> {
120+
let be = repo.dbe();
121+
let p = repo.pb.progress_spinner("listing packs...");
122+
let packs: HashMap<_, _> = be
123+
.list_with_size(FileType::Pack)
124+
.map_err(RusticErrorKind::Backend)?
125+
.into_iter()
126+
.collect();
127+
p.finish();
128+
129+
Ok(Self {
130+
packs,
131+
packs_to_read: Vec::new(),
132+
})
133+
}
134+
135+
fn check_pack(&mut self, indexfile: IndexFile, read_all: bool) -> (IndexFile, bool) {
136+
let mut new_index = IndexFile::default();
137+
let mut changed = false;
138+
for (p, to_delete) in indexfile.all_packs() {
139+
let index_size = p.pack_size();
140+
let id = p.id;
141+
match self.packs.remove(&id) {
142+
None => {
143+
// this pack either does not exist or was already indexed in another index file => remove from index!
144+
debug!("removing non-existing pack {id} from index");
145+
changed = true;
146+
}
147+
Some(size) => {
148+
if index_size != size {
149+
info!("pack {id}: size computed by index: {index_size}, actual size: {size}, will re-read header");
150+
}
151+
152+
if index_size != size || read_all {
153+
// pack exists, but sizes do not match or we want to read all pack files
154+
self.packs_to_read.push((
155+
id,
156+
Some(PackHeaderRef::from_index_pack(&p).size()),
157+
size,
158+
));
159+
} else {
160+
new_index.add(p, to_delete);
161+
}
162+
}
163+
}
164+
}
165+
(new_index, changed)
166+
}
167+
168+
fn into_pack_to_read(mut self) -> Vec<(Id, Option<u32>, u32)> {
169+
// add packs which are listed but not contained in the index
170+
self.packs_to_read
171+
.extend(self.packs.into_iter().map(|(id, size)| (id, None, size)));
172+
self.packs_to_read
173+
}
174+
}
175+
176+
pub(crate) fn index_checked_from_collector<P: ProgressBars, S: Open>(
177+
repo: &Repository<P, S>,
178+
mut collector: IndexCollector,
179+
) -> RusticResult<GlobalIndex> {
180+
let mut checker = PackChecker::new(repo)?;
181+
let be = repo.dbe();
182+
183+
let p = repo.pb.progress_counter("reading index...");
184+
for index in be.stream_all::<IndexFile>(&p)? {
185+
collector.extend(checker.check_pack(index?.1, false).0.packs);
186+
}
187+
p.finish();
188+
189+
let pack_read_header = checker.into_pack_to_read();
190+
repo.warm_up_wait(pack_read_header.iter().map(|(id, _, _)| *id))?;
191+
192+
let p = repo.pb.progress_counter("reading pack headers");
193+
p.set_length(
194+
pack_read_header
195+
.len()
196+
.try_into()
197+
.map_err(CommandErrorKind::ConversionToU64Failed)?,
198+
);
199+
let index_packs: Vec<_> = pack_read_header
200+
.into_iter()
201+
.map(|(id, size_hint, packsize)| {
202+
debug!("reading pack {id}...");
203+
let pack = IndexPack {
204+
id,
205+
blobs: PackHeader::from_file(be, id, size_hint, packsize)?.into_blobs(),
206+
..Default::default()
207+
};
208+
p.inc(1);
209+
Ok(pack)
210+
})
211+
.collect::<RusticResult<_>>()?;
212+
p.finish();
213+
214+
collector.extend(index_packs);
215+
Ok(GlobalIndex::new_from_index(collector.into_index()))
216+
}

crates/core/src/repofile/indexfile.rs

+7
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ impl IndexFile {
4545
self.packs.push(p);
4646
}
4747
}
48+
49+
pub(crate) fn all_packs(self) -> impl Iterator<Item = (IndexPack, bool)> {
50+
self.packs
51+
.into_iter()
52+
.map(|pack| (pack, false))
53+
.chain(self.packs_to_delete.into_iter().map(|pack| (pack, true)))
54+
}
4855
}
4956

5057
#[derive(Serialize, Deserialize, Default, Debug, Clone)]

0 commit comments

Comments
 (0)