Skip to content

Commit 177722b

Browse files
derrickstoleegitster
authored andcommitted
commit: integrate commit graph with commit parsing
Teach Git to inspect a commit graph file to supply the contents of a struct commit when calling parse_commit_gently(). This implementation satisfies all post-conditions on the struct commit, including loading parents, the root tree, and the commit date. If core.commitGraph is false, then do not check graph files. In test script t5318-commit-graph.sh, add output-matching conditions on read-only graph operations. By loading commits from the graph instead of parsing commit buffers, we save a lot of time on long commit walks. Here are some performance results for a copy of the Linux repository where 'master' has 678,653 reachable commits and is behind 'origin/master' by 59,929 commits. | Command | Before | After | Rel % | |----------------------------------|--------|--------|-------| | log --oneline --topo-order -1000 | 8.31s | 0.94s | -88% | | branch -vv | 1.02s | 0.14s | -86% | | rev-list --all | 5.89s | 1.07s | -81% | | rev-list --all --objects | 66.15s | 58.45s | -11% | Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4f2542b commit 177722b

6 files changed

+205
-2
lines changed

alloc.c

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ void *alloc_commit_node(void)
9393
struct commit *c = alloc_node(&commit_state, sizeof(struct commit));
9494
c->object.type = OBJ_COMMIT;
9595
c->index = alloc_commit_index();
96+
c->graph_pos = COMMIT_NOT_FROM_GRAPH;
9697
return c;
9798
}
9899

commit-graph.c

+140-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
#define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \
3939
GRAPH_OID_LEN + 8)
4040

41-
4241
char *get_commit_graph_filename(const char *obj_dir)
4342
{
4443
return xstrfmt("%s/info/commit-graph", obj_dir);
@@ -179,6 +178,145 @@ struct commit_graph *load_commit_graph_one(const char *graph_file)
179178
exit(1);
180179
}
181180

181+
/* global storage */
182+
static struct commit_graph *commit_graph = NULL;
183+
184+
static void prepare_commit_graph_one(const char *obj_dir)
185+
{
186+
char *graph_name;
187+
188+
if (commit_graph)
189+
return;
190+
191+
graph_name = get_commit_graph_filename(obj_dir);
192+
commit_graph = load_commit_graph_one(graph_name);
193+
194+
FREE_AND_NULL(graph_name);
195+
}
196+
197+
static int prepare_commit_graph_run_once = 0;
198+
static void prepare_commit_graph(void)
199+
{
200+
struct alternate_object_database *alt;
201+
char *obj_dir;
202+
203+
if (prepare_commit_graph_run_once)
204+
return;
205+
prepare_commit_graph_run_once = 1;
206+
207+
obj_dir = get_object_directory();
208+
prepare_commit_graph_one(obj_dir);
209+
prepare_alt_odb();
210+
for (alt = alt_odb_list; !commit_graph && alt; alt = alt->next)
211+
prepare_commit_graph_one(alt->path);
212+
}
213+
214+
static void close_commit_graph(void)
215+
{
216+
if (!commit_graph)
217+
return;
218+
219+
if (commit_graph->graph_fd >= 0) {
220+
munmap((void *)commit_graph->data, commit_graph->data_len);
221+
commit_graph->data = NULL;
222+
close(commit_graph->graph_fd);
223+
}
224+
225+
FREE_AND_NULL(commit_graph);
226+
}
227+
228+
static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
229+
{
230+
return bsearch_hash(oid->hash, g->chunk_oid_fanout,
231+
g->chunk_oid_lookup, g->hash_len, pos);
232+
}
233+
234+
static struct commit_list **insert_parent_or_die(struct commit_graph *g,
235+
uint64_t pos,
236+
struct commit_list **pptr)
237+
{
238+
struct commit *c;
239+
struct object_id oid;
240+
hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos);
241+
c = lookup_commit(&oid);
242+
if (!c)
243+
die("could not find commit %s", oid_to_hex(&oid));
244+
c->graph_pos = pos;
245+
return &commit_list_insert(c, pptr)->next;
246+
}
247+
248+
static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
249+
{
250+
struct object_id oid;
251+
uint32_t edge_value;
252+
uint32_t *parent_data_ptr;
253+
uint64_t date_low, date_high;
254+
struct commit_list **pptr;
255+
const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos;
256+
257+
item->object.parsed = 1;
258+
item->graph_pos = pos;
259+
260+
hashcpy(oid.hash, commit_data);
261+
item->tree = lookup_tree(&oid);
262+
263+
date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
264+
date_low = get_be32(commit_data + g->hash_len + 12);
265+
item->date = (timestamp_t)((date_high << 32) | date_low);
266+
267+
pptr = &item->parents;
268+
269+
edge_value = get_be32(commit_data + g->hash_len);
270+
if (edge_value == GRAPH_PARENT_NONE)
271+
return 1;
272+
pptr = insert_parent_or_die(g, edge_value, pptr);
273+
274+
edge_value = get_be32(commit_data + g->hash_len + 4);
275+
if (edge_value == GRAPH_PARENT_NONE)
276+
return 1;
277+
if (!(edge_value & GRAPH_OCTOPUS_EDGES_NEEDED)) {
278+
pptr = insert_parent_or_die(g, edge_value, pptr);
279+
return 1;
280+
}
281+
282+
parent_data_ptr = (uint32_t*)(g->chunk_large_edges +
283+
4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
284+
do {
285+
edge_value = get_be32(parent_data_ptr);
286+
pptr = insert_parent_or_die(g,
287+
edge_value & GRAPH_EDGE_LAST_MASK,
288+
pptr);
289+
parent_data_ptr++;
290+
} while (!(edge_value & GRAPH_LAST_EDGE));
291+
292+
return 1;
293+
}
294+
295+
int parse_commit_in_graph(struct commit *item)
296+
{
297+
if (!core_commit_graph)
298+
return 0;
299+
if (item->object.parsed)
300+
return 1;
301+
302+
prepare_commit_graph();
303+
if (commit_graph) {
304+
uint32_t pos;
305+
int found;
306+
if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
307+
pos = item->graph_pos;
308+
found = 1;
309+
} else {
310+
found = bsearch_graph(commit_graph, &(item->object.oid), &pos);
311+
}
312+
313+
if (found)
314+
return fill_commit_in_graph(item, commit_graph, pos);
315+
}
316+
317+
return 0;
318+
}
319+
182320
static void write_graph_chunk_fanout(struct hashfile *f,
183321
struct commit **commits,
184322
int nr_commits)
@@ -530,6 +668,7 @@ void write_commit_graph(const char *obj_dir)
530668
write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
531669
write_graph_chunk_large_edges(f, commits.list, commits.nr);
532670

671+
close_commit_graph();
533672
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
534673
commit_lock_file(&lk);
535674

commit-graph.h

+12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@
55

66
char *get_commit_graph_filename(const char *obj_dir);
77

8+
/*
9+
* Given a commit struct, try to fill the commit struct info, including:
10+
* 1. tree object
11+
* 2. date
12+
* 3. parents.
13+
*
14+
* Returns 1 if and only if the commit was found in the packed graph.
15+
*
16+
* See parse_commit_buffer() for the fallback after this call.
17+
*/
18+
int parse_commit_in_graph(struct commit *item);
19+
820
struct commit_graph {
921
int graph_fd;
1022

commit.c

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "cache.h"
22
#include "tag.h"
33
#include "commit.h"
4+
#include "commit-graph.h"
45
#include "pkt-line.h"
56
#include "utf8.h"
67
#include "diff.h"
@@ -383,6 +384,8 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing)
383384
return -1;
384385
if (item->object.parsed)
385386
return 0;
387+
if (parse_commit_in_graph(item))
388+
return 0;
386389
buffer = read_sha1_file(item->object.oid.hash, &type, &size);
387390
if (!buffer)
388391
return quiet_on_missing ? -1 :

commit.h

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "string-list.h"
1010
#include "pretty.h"
1111

12+
#define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF
13+
1214
struct commit_list {
1315
struct commit *item;
1416
struct commit_list *next;
@@ -21,6 +23,7 @@ struct commit {
2123
timestamp_t date;
2224
struct commit_list *parents;
2325
struct tree *tree;
26+
uint32_t graph_pos;
2427
};
2528

2629
extern int save_commit_buffer;

t/t5318-commit-graph.sh

+46-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ test_expect_success 'setup full repo' '
77
mkdir full &&
88
cd "$TRASH_DIRECTORY/full" &&
99
git init &&
10+
git config core.commitGraph true &&
1011
objdir=".git/objects"
1112
'
1213

@@ -26,6 +27,29 @@ test_expect_success 'create commits and repack' '
2627
git repack
2728
'
2829

30+
graph_git_two_modes() {
31+
git -c core.graph=true $1 >output
32+
git -c core.graph=false $1 >expect
33+
test_cmp output expect
34+
}
35+
36+
graph_git_behavior() {
37+
MSG=$1
38+
DIR=$2
39+
BRANCH=$3
40+
COMPARE=$4
41+
test_expect_success "check normal git operations: $MSG" '
42+
cd "$TRASH_DIRECTORY/$DIR" &&
43+
graph_git_two_modes "log --oneline $BRANCH" &&
44+
graph_git_two_modes "log --topo-order $BRANCH" &&
45+
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
46+
graph_git_two_modes "branch -vv" &&
47+
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
48+
'
49+
}
50+
51+
graph_git_behavior 'no graph' full commits/3 commits/1
52+
2953
graph_read_expect() {
3054
OPTIONAL=""
3155
NUM_CHUNKS=3
@@ -50,6 +74,8 @@ test_expect_success 'write graph' '
5074
graph_read_expect "3"
5175
'
5276

77+
graph_git_behavior 'graph exists' full commits/3 commits/1
78+
5379
test_expect_success 'Add more commits' '
5480
cd "$TRASH_DIRECTORY/full" &&
5581
git reset --hard commits/1 &&
@@ -86,14 +112,17 @@ test_expect_success 'Add more commits' '
86112
# |___/____/
87113
# 1
88114

89-
90115
test_expect_success 'write graph with merges' '
91116
cd "$TRASH_DIRECTORY/full" &&
92117
git commit-graph write &&
93118
test_path_is_file $objdir/info/commit-graph &&
94119
graph_read_expect "10" "large_edges"
95120
'
96121

122+
graph_git_behavior 'merge 1 vs 2' full merge/1 merge/2
123+
graph_git_behavior 'merge 1 vs 3' full merge/1 merge/3
124+
graph_git_behavior 'merge 2 vs 3' full merge/2 merge/3
125+
97126
test_expect_success 'Add one more commit' '
98127
cd "$TRASH_DIRECTORY/full" &&
99128
test_commit 8 &&
@@ -115,32 +144,48 @@ test_expect_success 'Add one more commit' '
115144
# |___/____/
116145
# 1
117146

147+
graph_git_behavior 'mixed mode, commit 8 vs merge 1' full commits/8 merge/1
148+
graph_git_behavior 'mixed mode, commit 8 vs merge 2' full commits/8 merge/2
149+
118150
test_expect_success 'write graph with new commit' '
119151
cd "$TRASH_DIRECTORY/full" &&
120152
git commit-graph write &&
121153
test_path_is_file $objdir/info/commit-graph &&
122154
graph_read_expect "11" "large_edges"
123155
'
124156

157+
graph_git_behavior 'full graph, commit 8 vs merge 1' full commits/8 merge/1
158+
graph_git_behavior 'full graph, commit 8 vs merge 2' full commits/8 merge/2
159+
125160
test_expect_success 'write graph with nothing new' '
126161
cd "$TRASH_DIRECTORY/full" &&
127162
git commit-graph write &&
128163
test_path_is_file $objdir/info/commit-graph &&
129164
graph_read_expect "11" "large_edges"
130165
'
131166

167+
graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1
168+
graph_git_behavior 'cleared graph, commit 8 vs merge 2' full commits/8 merge/2
169+
132170
test_expect_success 'setup bare repo' '
133171
cd "$TRASH_DIRECTORY" &&
134172
git clone --bare --no-local full bare &&
135173
cd bare &&
174+
git config core.commitGraph true &&
136175
baredir="./objects"
137176
'
138177

178+
graph_git_behavior 'bare repo, commit 8 vs merge 1' bare commits/8 merge/1
179+
graph_git_behavior 'bare repo, commit 8 vs merge 2' bare commits/8 merge/2
180+
139181
test_expect_success 'write graph in bare repo' '
140182
cd "$TRASH_DIRECTORY/bare" &&
141183
git commit-graph write &&
142184
test_path_is_file $baredir/info/commit-graph &&
143185
graph_read_expect "11" "large_edges"
144186
'
145187

188+
graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1
189+
graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2
190+
146191
test_done

0 commit comments

Comments
 (0)