diff --git a/builtin/backfill.c b/builtin/backfill.c index d794dd842f65ce..7ef9dc305e1bc5 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -144,6 +144,10 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit if (argc > 1) die(_("unrecognized argument: %s"), argv[1]); + if (!path_walk_filter_compatible(&ctx.revs.filter)) + die(_("cannot backfill with these filter options")); + if (ctx.revs.filter.blob_limit_value) + die(_("cannot backfill with blob size limits")); repo_config(repo, git_default_config, NULL); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index dd2480a73d2edf..bc9fb5b45737a3 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -5177,7 +5177,7 @@ int cmd_pack_objects(int argc, if (path_walk) { const char *option = NULL; - if (filter_options.choice) + if (!path_walk_filter_compatible(&filter_options)) option = "--filter"; else if (use_delta_islands) option = "--delta-islands"; @@ -5190,10 +5190,7 @@ int cmd_pack_objects(int argc, } if (path_walk) { strvec_push(&rp, "--boundary"); - /* - * We must disable the bitmaps because we are removing - * the --objects / --objects-edge[-aggressive] options. - */ + strvec_push(&rp, "--objects"); use_bitmap_index = 0; } else if (thin) { use_internal_rev_list = 1; diff --git a/path-walk.c b/path-walk.c index 6e426af4330893..8c01a39f754146 100644 --- a/path-walk.c +++ b/path-walk.c @@ -9,6 +9,9 @@ #include "hashmap.h" #include "hex.h" #include "list-objects.h" +#include "list-objects-filter-options.h" +#include "object-name.h" +#include "odb.h" #include "object.h" #include "oid-array.h" #include "path.h" @@ -178,10 +181,8 @@ static int add_tree_entries(struct path_walk_context *ctx, return -1; } - /* Skip this object if already seen. */ if (o->flags & SEEN) continue; - o->flags |= SEEN; strbuf_setlen(&path, base_len); strbuf_add(&path, entry.path, entry.pathlen); @@ -237,6 +238,7 @@ static int add_tree_entries(struct path_walk_context *ctx, continue; } + o->flags |= SEEN; add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -314,9 +316,29 @@ static int walk_path(struct path_walk_context *ctx, /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || - (list->type == OBJ_TAG && ctx->info->tags)) - ret = ctx->info->path_fn(path, &list->oids, list->type, - ctx->info->path_fn_data); + (list->type == OBJ_TAG && ctx->info->tags)) { + struct oid_array *oids = &list->oids; + struct oid_array filtered = OID_ARRAY_INIT; + + if (list->type == OBJ_BLOB && ctx->info->blob_limit) { + for (size_t i = 0; i < list->oids.nr; i++) { + unsigned long size; + + if (odb_read_object_info(ctx->repo->objects, + &list->oids.oid[i], + &size) != OBJ_BLOB || + size < ctx->info->blob_limit) + oid_array_append(&filtered, + &list->oids.oid[i]); + } + oids = &filtered; + } + + if (oids->nr) + ret = ctx->info->path_fn(path, oids, list->type, + ctx->info->path_fn_data); + oid_array_clear(&filtered); + } /* Expand data for children. */ if (list->type == OBJ_TREE) { @@ -485,6 +507,85 @@ static int setup_pending_objects(struct path_walk_info *info, return 0; } +static int prepare_filters(struct path_walk_info *info, + struct list_objects_filter_options *options) +{ + switch (options->choice) { + case LOFC_DISABLED: + return 1; + + case LOFC_BLOB_NONE: + if (info) { + info->blobs = 0; + list_objects_filter_release(options); + } + return 1; + + case LOFC_BLOB_LIMIT: + if (info) { + if (!options->blob_limit_value) { + info->blobs = 0; + } else { + info->blob_limit = options->blob_limit_value; + } + list_objects_filter_release(options); + } + return 1; + + case LOFC_SPARSE_OID: + if (info) { + struct object_id sparse_oid; + struct repository *repo = info->revs->repo; + + if (info->pl) { + warning(_("sparse filter cannot be combined with existing sparse patterns")); + return 0; + } + + if (repo_get_oid_with_flags(repo, + options->sparse_oid_name, + &sparse_oid, + GET_OID_BLOB)) { + error(_("unable to access sparse blob in '%s'"), + options->sparse_oid_name); + return 0; + } + + CALLOC_ARRAY(info->pl, 1); + info->pl->use_cone_patterns = 1; + + if (add_patterns_from_blob_to_list(&sparse_oid, "", 0, + info->pl) < 0) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + error(_("unable to parse sparse filter data in '%s'"), + oid_to_hex(&sparse_oid)); + return 0; + } + + if (!info->pl->use_cone_patterns) { + clear_pattern_list(info->pl); + FREE_AND_NULL(info->pl); + warning(_("sparse filter is not cone-mode compatible")); + return 0; + } + + list_objects_filter_release(options); + } + return 1; + + default: + error(_("object filter '%s' not supported by the path-walk API"), + list_objects_filter_spec(options)); + return 0; + } +} + +int path_walk_filter_compatible(struct list_objects_filter_options *options) +{ + return prepare_filters(NULL, options); +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -512,6 +613,9 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + if (!prepare_filters(info, &info->revs->filter)) + return -1; + CALLOC_ARRAY(commit_list, 1); commit_list->type = OBJ_COMMIT; diff --git a/path-walk.h b/path-walk.h index 5ef5a8440e6b5e..bcb81b70a14272 100644 --- a/path-walk.h +++ b/path-walk.h @@ -42,6 +42,14 @@ struct path_walk_info { int blobs; int tags; + /** + * If non-zero, specifies a maximum blob size. Blobs with a + * size equal to or greater than this limit will be omitted + * from the walk. Blobs smaller than the limit (or blobs + * whose size cannot be determined) are still visited. + */ + unsigned long blob_limit; + /** * When 'prune_all_uninteresting' is set and a path has all objects * marked as UNINTERESTING, then the path-walk will not visit those @@ -85,3 +93,10 @@ void path_walk_info_clear(struct path_walk_info *info); * Returns nonzero on an error. */ int walk_objects_by_path(struct path_walk_info *info); + +struct list_objects_filter_options; +/** + * Given a set of options for filtering objects, return 1 if the options + * are compatible with the path-walk API and 0 otherwise. + */ +int path_walk_filter_compatible(struct list_objects_filter_options *options); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index fe63002c2be27d..88f86ae0dc1157 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -4,6 +4,7 @@ #include "dir.h" #include "environment.h" #include "hex.h" +#include "list-objects-filter-options.h" #include "object-name.h" #include "object.h" #include "pretty.h" @@ -71,6 +72,8 @@ int cmd__path_walk(int argc, const char **argv) struct rev_info revs = REV_INFO_INIT; struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; + struct list_objects_filter_options filter_options = + LIST_OBJECTS_FILTER_INIT; struct option options[] = { OPT_BOOL(0, "blobs", &info.blobs, N_("toggle inclusion of blob objects")), @@ -86,11 +89,12 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle aggressive edge walk")), OPT_BOOL(0, "stdin-pl", &stdin_pl, N_("read a pattern list over stdin")), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END(), }; setup_git_directory(); - revs.repo = the_repository; + repo_init_revisions(the_repository, &revs, NULL); argc = parse_options(argc, argv, NULL, options, path_walk_usage, @@ -101,6 +105,10 @@ int cmd__path_walk(int argc, const char **argv) else usage(path_walk_usage[0]); + /* Apply the filter after setup_revisions to avoid the --objects check. */ + if (filter_options.choice) + list_objects_filter_copy(&revs.filter, &filter_options); + info.revs = &revs; info.path_fn = emit_block; info.path_fn_data = &data; @@ -129,6 +137,7 @@ int cmd__path_walk(int argc, const char **argv) free(info.pl); } + list_objects_filter_release(&filter_options); release_revisions(&revs); return res; } diff --git a/t/perf/p5315-pack-objects-filter.sh b/t/perf/p5315-pack-objects-filter.sh new file mode 100755 index 00000000000000..00226c15393006 --- /dev/null +++ b/t/perf/p5315-pack-objects-filter.sh @@ -0,0 +1,129 @@ +#!/bin/sh + +test_description='Tests pack-objects performance with filters and --path-walk' +. ./perf-lib.sh + +test_perf_large_repo + +test_expect_success 'setup filter inputs' ' + # Sample a few depth-2 directories from the test repo to build + # a cone-mode sparse-checkout definition. The sampling picks + # directories at evenly-spaced positions so the choice is stable + # and scales to repos of any shape. + + git ls-tree -d --name-only HEAD >top-dirs && + top_nr=$(wc -l depth2-dirs && + while read tdir + do + git ls-tree -d --name-only "HEAD:$tdir" 2>/dev/null | + sed "s|^|$tdir/|" >>depth2-dirs || return 1 + done sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first $mid" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + elif test "$top_nr" -ge 1 + then + # Fallback: use a single top-level directory. + first=$(sed -n "1p" top-dirs) && + { + echo "/*" && + echo "!/*/" && + echo "/$first/" + } >sparse-patterns && + + git hash-object -w sparse-patterns >sparse-oid && + echo "Sparse cone: $first" && + cat sparse-patterns && + test_set_prereq SPARSE_OID + fi +' + +test_perf 'repack (no filter)' ' + git pack-objects --stdout --revs --all pk +' + +test_size 'repack size (no filter)' ' + test_file_size pk +' + +test_perf 'repack (no filter, --path-walk)' ' + git pack-objects --stdout --revs --all --path-walk pk +' + +test_size 'repack size (no filter, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (blob:none)' ' + git pack-objects --stdout --revs --all --filter=blob:none pk +' + +test_size 'repack size (blob:none)' ' + test_file_size pk +' + +test_perf 'repack (blob:none, --path-walk)' ' + git pack-objects --stdout --revs --all --path-walk \ + --filter=blob:none pk +' + +test_size 'repack size (blob:none, --path-walk)' ' + test_file_size pk +' + +test_perf 'repack (sparse:oid)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --revs --all \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_perf 'repack (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + git pack-objects --stdout --revs --all --path-walk \ + --filter=sparse:oid=$(cat sparse-oid) pk +' + +test_size 'repack size (sparse:oid, --path-walk)' \ + --prereq SPARSE_OID ' + test_file_size pk +' + +test_done diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 501d715b9a16b7..bce61fc3321056 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -478,4 +478,135 @@ test_expect_success 'verify pack-objects w/ --missing=allow-any' ' EOF ' +# Test that --path-walk produces the same object set as standard traversal +# when using sparse:oid filters with cone-mode patterns. +# +# The standard sparse filter always includes ALL tree objects, while the +# path-walk with cone-mode patterns prunes entire subtrees that are outside +# the sparse cone. Both should produce the same set of blobs and commits. + +test_expect_success 'setup pw_sparse for path-walk comparison' ' + git init pw_sparse && + mkdir -p pw_sparse/inc/sub pw_sparse/exc/sub && + + for n in 1 2 + do + echo "inc $n" >pw_sparse/inc/file$n && + echo "inc sub $n" >pw_sparse/inc/sub/file$n && + echo "exc $n" >pw_sparse/exc/file$n && + echo "exc sub $n" >pw_sparse/exc/sub/file$n && + echo "root $n" >pw_sparse/root$n || return 1 + done && + + git -C pw_sparse add . && + git -C pw_sparse commit -m "first" && + + echo "inc 1 modified" >pw_sparse/inc/file1 && + echo "exc 1 modified" >pw_sparse/exc/file1 && + echo "root 1 modified" >pw_sparse/root1 && + git -C pw_sparse add . && + git -C pw_sparse commit -m "second" && + + # Cone-mode sparse pattern: include root + inc/ + printf "/*\n!/*/\n/inc/\n" | + git -C pw_sparse hash-object -w --stdin >sparse_oid +' + +test_expect_success 'sparse:oid with --path-walk produces same blobs' ' + oid=$(cat sparse_oid) && + + git -C pw_sparse pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >standard.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../standard.pack && + git -C pw_sparse verify-pack -v ../standard.pack >standard_verify && + + git -C pw_sparse pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >pathwalk.pack <<-EOF && + HEAD + EOF + git -C pw_sparse index-pack ../pathwalk.pack && + git -C pw_sparse verify-pack -v ../pathwalk.pack >pathwalk_verify && + + # Blobs must match exactly + grep -E "^[0-9a-f]{40} blob" standard_verify | + awk "{print \$1}" | sort >standard_blobs && + grep -E "^[0-9a-f]{40} blob" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_blobs && + test_cmp standard_blobs pathwalk_blobs && + + # Commits must match exactly + grep -E "^[0-9a-f]{40} commit" standard_verify | + awk "{print \$1}" | sort >standard_commits && + grep -E "^[0-9a-f]{40} commit" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_commits && + test_cmp standard_commits pathwalk_commits +' + +test_expect_success 'sparse:oid with --path-walk prunes excluded trees' ' + # The standard filter includes ALL trees, while path-walk + # prunes trees outside the sparse cone (exc/ and exc/sub/). + grep -E "^[0-9a-f]{40} tree" standard_verify | + awk "{print \$1}" | sort >standard_trees && + grep -E "^[0-9a-f]{40} tree" pathwalk_verify | + awk "{print \$1}" | sort >pathwalk_trees && + + # path-walk should have fewer trees + test $(wc -l extra_trees && + test_must_be_empty extra_trees +' + +# Test the edge case where the same tree/blob OID appears at both an +# in-cone and out-of-cone path. When sibling directories have identical +# contents, they share a tree OID. The path-walk defers marking objects +# SEEN until after checking sparse patterns, so an object at an out-of-cone +# path can still be discovered at an in-cone path. + +test_expect_success 'setup pw_shared for shared OID across cone boundary' ' + git init pw_shared && + mkdir pw_shared/aaa pw_shared/zzz && + echo "shared content" >pw_shared/aaa/file && + echo "shared content" >pw_shared/zzz/file && + echo "root file" >pw_shared/rootfile && + git -C pw_shared add . && + git -C pw_shared commit -m "aaa and zzz share tree OID" && + + # Verify they share a tree OID + aaa_tree=$(git -C pw_shared rev-parse HEAD:aaa) && + zzz_tree=$(git -C pw_shared rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + printf "/*\n!/*/\n/zzz/\n" | + git -C pw_shared hash-object -w --stdin >shared_sparse_oid +' + +test_expect_success 'shared tree OID: --path-walk blobs match standard' ' + oid=$(cat shared_sparse_oid) && + + git -C pw_shared pack-objects --revs --stdout \ + --filter=sparse:oid=$oid >shared_std.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_std.pack && + git -C pw_shared verify-pack -v ../shared_std.pack >shared_std_verify && + + git -C pw_shared pack-objects --revs --stdout \ + --path-walk --filter=sparse:oid=$oid >shared_pw.pack <<-EOF && + HEAD + EOF + git -C pw_shared index-pack ../shared_pw.pack && + git -C pw_shared verify-pack -v ../shared_pw.pack >shared_pw_verify && + + grep -E "^[0-9a-f]{40} blob" shared_std_verify | + awk "{print \$1}" | sort >shared_std_blobs && + grep -E "^[0-9a-f]{40} blob" shared_pw_verify | + awk "{print \$1}" | sort >shared_pw_blobs && + test_cmp shared_std_blobs shared_pw_blobs +' + test_done diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index f3b5e39493677b..3c8a75192a4791 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -15,6 +15,14 @@ test_expect_success 'backfill rejects unexpected arguments' ' test_grep "unrecognized argument: --unexpected-arg" err ' +test_expect_success 'backfill rejects incompatible filter options' ' + test_must_fail git backfill --objects --filter=tree:1 2>err && + test_grep "cannot backfill with these filter options" err && + + test_must_fail git backfill --objects --filter=blob:limit=10m 2>err && + test_grep "cannot backfill with blob size limits" err +' + # We create objects in the 'src' repo. test_expect_success 'setup repo for object creation' ' echo "{print \$1}" >print_1.awk && diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 56bd1e3c5bec97..e247b08dfdf19b 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -415,4 +415,262 @@ test_expect_success 'trees are reported exactly once' ' test_line_count = 1 out-filtered ' +test_expect_success 'all, blob:none filter' ' + test-tool path-walk --filter=blob:none -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:tree::$(git rev-parse topic^{tree}) + 2:tree::$(git rev-parse base^{tree}) + 2:tree::$(git rev-parse base~1^{tree}) + 2:tree::$(git rev-parse base~2^{tree}) + 2:tree::$(git rev-parse refs/tags/tree-tag^{}) + 2:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 5:tree:left/:$(git rev-parse base:left) + 5:tree:left/:$(git rev-parse base~2:left) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, blob:none filter' ' + test-tool path-walk --filter=blob:none -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:tree:left/:$(git rev-parse base~2:left) + 3:tree:right/:$(git rev-parse topic:right) + 3:tree:right/:$(git rev-parse base~1:right) + 3:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:3 + tags:0 + trees:7 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=0 filter' ' + test-tool path-walk --filter=blob:limit=0 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:tree::$(git rev-parse topic^{tree}) + 2:tree::$(git rev-parse base^{tree}) + 2:tree::$(git rev-parse base~1^{tree}) + 2:tree::$(git rev-parse base~2^{tree}) + 2:tree::$(git rev-parse refs/tags/tree-tag^{}) + 2:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 3:tree:a/:$(git rev-parse base:a) + 4:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 5:tree:left/:$(git rev-parse base:left) + 5:tree:left/:$(git rev-parse base~2:left) + 6:tree:right/:$(git rev-parse topic:right) + 6:tree:right/:$(git rev-parse base~1:right) + 6:tree:right/:$(git rev-parse base~2:right) + blobs:0 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'all, blob:limit=3 filter' ' + test-tool path-walk --filter=blob:limit=3 -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:tree::$(git rev-parse topic^{tree}) + 2:tree::$(git rev-parse base^{tree}) + 2:tree::$(git rev-parse base~1^{tree}) + 2:tree::$(git rev-parse base~2^{tree}) + 2:tree::$(git rev-parse refs/tags/tree-tag^{}) + 2:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 3:blob:a:$(git rev-parse base~2:a) + 4:tree:a/:$(git rev-parse base:a) + 5:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:blob:left/b:$(git rev-parse base~2:left/b) + 8:tree:right/:$(git rev-parse topic:right) + 8:tree:right/:$(git rev-parse base~1:right) + 8:tree:right/:$(git rev-parse base~2:right) + 9:blob:right/c:$(git rev-parse base~2:right/c) + 10:blob:right/d:$(git rev-parse base~1:right/d) + blobs:4 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'setup sparse filter blob' ' + # Cone-mode patterns: include root, exclude all dirs, include left/ + cat >patterns <<-\EOF && + /* + !/*/ + /left/ + EOF + sparse_oid=$(git hash-object -w -t blob patterns) +' + +test_expect_success 'all, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- --all >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:blob:left/b:$(git rev-parse base~2:left/b) + 7:blob:left/b:$(git rev-parse base:left/b) + blobs:6 + commits:4 + tags:7 + trees:8 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only, sparse:oid filter' ' + test-tool path-walk --filter=sparse:oid=$sparse_oid -- topic >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:left/:$(git rev-parse base~2:left) + 4:blob:left/b:$(git rev-parse base~2:left/b) + blobs:2 + commits:3 + tags:0 + trees:4 + EOF + + test_cmp_sorted expect out +' + +# Demonstrate the SEEN flag ordering issue: when the same tree/blob OID +# appears at two sibling paths where one is in-cone and the other is +# out-of-cone, the path-walk may mark the object SEEN via the out-of-cone +# path (which sorts first alphabetically) and then skip it when encountered +# at the in-cone path. + +test_expect_success 'setup shared tree OID across cone boundary' ' + git checkout --orphan shared-tree && + git rm -rf . && + mkdir aaa zzz && + echo "shared content" >aaa/file && + echo "shared content" >zzz/file && + echo "root file" >rootfile && + git add aaa zzz rootfile && + git commit -m "aaa and zzz have same tree OID" && + + # Verify they really share a tree OID + aaa_tree=$(git rev-parse HEAD:aaa) && + zzz_tree=$(git rev-parse HEAD:zzz) && + test "$aaa_tree" = "$zzz_tree" && + + # Cone pattern: include root + zzz/ (not aaa/) + cat >shared-patterns <<-\EOF && + /* + !/*/ + /zzz/ + EOF + shared_sparse_oid=$(git hash-object -w -t blob shared-patterns) +' + +test_expect_success 'sparse:oid with shared tree OID across cone boundary' ' + test-tool path-walk \ + --filter=sparse:oid=$shared_sparse_oid \ + -- shared-tree >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse shared-tree) + 1:tree::$(git rev-parse shared-tree^{tree}) + 2:blob:rootfile:$(git rev-parse shared-tree:rootfile) + 3:tree:zzz/:$(git rev-parse shared-tree:zzz) + 4:blob:zzz/file:$(git rev-parse shared-tree:zzz/file) + blobs:2 + commits:1 + tags:0 + trees:2 + EOF + + test_cmp_sorted expect out +' + test_done