Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions builtin/backfill.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit

if (argc > 1)
die(_("unrecognized argument: %s"), argv[1]);
if (!path_walk_filter_compatible(&ctx.revs.filter))
die(_("cannot backfill with these filter options"));
if (ctx.revs.filter.blob_limit_value)
die(_("cannot backfill with blob size limits"));

repo_config(repo, git_default_config, NULL);

Expand Down
7 changes: 2 additions & 5 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -5177,7 +5177,7 @@ int cmd_pack_objects(int argc,

if (path_walk) {
const char *option = NULL;
if (filter_options.choice)
if (!path_walk_filter_compatible(&filter_options))
option = "--filter";
else if (use_delta_islands)
option = "--delta-islands";
Expand All @@ -5190,10 +5190,7 @@ int cmd_pack_objects(int argc,
}
if (path_walk) {
strvec_push(&rp, "--boundary");
/*
* We must disable the bitmaps because we are removing
* the --objects / --objects-edge[-aggressive] options.
*/
strvec_push(&rp, "--objects");
use_bitmap_index = 0;
} else if (thin) {
use_internal_rev_list = 1;
Expand Down
114 changes: 109 additions & 5 deletions path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "hashmap.h"
#include "hex.h"
#include "list-objects.h"
#include "list-objects-filter-options.h"
#include "object-name.h"
#include "odb.h"
#include "object.h"
#include "oid-array.h"
#include "path.h"
Expand Down Expand Up @@ -178,10 +181,8 @@ static int add_tree_entries(struct path_walk_context *ctx,
return -1;
}

/* Skip this object if already seen. */
if (o->flags & SEEN)
continue;
o->flags |= SEEN;

strbuf_setlen(&path, base_len);
strbuf_add(&path, entry.path, entry.pathlen);
Expand Down Expand Up @@ -237,6 +238,7 @@ static int add_tree_entries(struct path_walk_context *ctx,
continue;
}

o->flags |= SEEN;
add_path_to_list(ctx, path.buf, type, &entry.oid,
!(o->flags & UNINTERESTING));

Expand Down Expand Up @@ -314,9 +316,29 @@ static int walk_path(struct path_walk_context *ctx,
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs) ||
(list->type == OBJ_TAG && ctx->info->tags))
ret = ctx->info->path_fn(path, &list->oids, list->type,
ctx->info->path_fn_data);
(list->type == OBJ_TAG && ctx->info->tags)) {
struct oid_array *oids = &list->oids;
struct oid_array filtered = OID_ARRAY_INIT;

if (list->type == OBJ_BLOB && ctx->info->blob_limit) {
for (size_t i = 0; i < list->oids.nr; i++) {
unsigned long size;

if (odb_read_object_info(ctx->repo->objects,
&list->oids.oid[i],
&size) != OBJ_BLOB ||
size < ctx->info->blob_limit)
oid_array_append(&filtered,
&list->oids.oid[i]);
}
oids = &filtered;
}

if (oids->nr)
ret = ctx->info->path_fn(path, oids, list->type,
ctx->info->path_fn_data);
oid_array_clear(&filtered);
}

/* Expand data for children. */
if (list->type == OBJ_TREE) {
Expand Down Expand Up @@ -485,6 +507,85 @@ static int setup_pending_objects(struct path_walk_info *info,
return 0;
}

static int prepare_filters(struct path_walk_info *info,
struct list_objects_filter_options *options)
{
switch (options->choice) {
case LOFC_DISABLED:
return 1;

case LOFC_BLOB_NONE:
if (info) {
info->blobs = 0;
list_objects_filter_release(options);
}
return 1;

case LOFC_BLOB_LIMIT:
if (info) {
if (!options->blob_limit_value) {
info->blobs = 0;
} else {
info->blob_limit = options->blob_limit_value;
}
list_objects_filter_release(options);
}
return 1;

case LOFC_SPARSE_OID:
if (info) {
struct object_id sparse_oid;
struct repository *repo = info->revs->repo;

if (info->pl) {
warning(_("sparse filter cannot be combined with existing sparse patterns"));
return 0;
}

if (repo_get_oid_with_flags(repo,
options->sparse_oid_name,
&sparse_oid,
GET_OID_BLOB)) {
error(_("unable to access sparse blob in '%s'"),
options->sparse_oid_name);
return 0;
}

CALLOC_ARRAY(info->pl, 1);
info->pl->use_cone_patterns = 1;

if (add_patterns_from_blob_to_list(&sparse_oid, "", 0,
info->pl) < 0) {
clear_pattern_list(info->pl);
FREE_AND_NULL(info->pl);
error(_("unable to parse sparse filter data in '%s'"),
oid_to_hex(&sparse_oid));
return 0;
}

if (!info->pl->use_cone_patterns) {
clear_pattern_list(info->pl);
FREE_AND_NULL(info->pl);
warning(_("sparse filter is not cone-mode compatible"));
return 0;
}

list_objects_filter_release(options);
}
return 1;

default:
error(_("object filter '%s' not supported by the path-walk API"),
list_objects_filter_spec(options));
return 0;
}
}

int path_walk_filter_compatible(struct list_objects_filter_options *options)
{
return prepare_filters(NULL, options);
}

/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
* call 'info->path_fn' on each discovered path.
Expand Down Expand Up @@ -512,6 +613,9 @@ int walk_objects_by_path(struct path_walk_info *info)

trace2_region_enter("path-walk", "commit-walk", info->revs->repo);

if (!prepare_filters(info, &info->revs->filter))
return -1;

CALLOC_ARRAY(commit_list, 1);
commit_list->type = OBJ_COMMIT;

Expand Down
15 changes: 15 additions & 0 deletions path-walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ struct path_walk_info {
int blobs;
int tags;

/**
* If non-zero, specifies a maximum blob size. Blobs with a
* size equal to or greater than this limit will be omitted
* from the walk. Blobs smaller than the limit (or blobs
* whose size cannot be determined) are still visited.
*/
unsigned long blob_limit;

/**
* When 'prune_all_uninteresting' is set and a path has all objects
* marked as UNINTERESTING, then the path-walk will not visit those
Expand Down Expand Up @@ -85,3 +93,10 @@ void path_walk_info_clear(struct path_walk_info *info);
* Returns nonzero on an error.
*/
int walk_objects_by_path(struct path_walk_info *info);

struct list_objects_filter_options;
/**
* Given a set of options for filtering objects, return 1 if the options
* are compatible with the path-walk API and 0 otherwise.
*/
int path_walk_filter_compatible(struct list_objects_filter_options *options);
11 changes: 10 additions & 1 deletion t/helper/test-path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "dir.h"
#include "environment.h"
#include "hex.h"
#include "list-objects-filter-options.h"
#include "object-name.h"
#include "object.h"
#include "pretty.h"
Expand Down Expand Up @@ -71,6 +72,8 @@ int cmd__path_walk(int argc, const char **argv)
struct rev_info revs = REV_INFO_INIT;
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct path_walk_test_data data = { 0 };
struct list_objects_filter_options filter_options =
LIST_OBJECTS_FILTER_INIT;
struct option options[] = {
OPT_BOOL(0, "blobs", &info.blobs,
N_("toggle inclusion of blob objects")),
Expand All @@ -86,11 +89,12 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle aggressive edge walk")),
OPT_BOOL(0, "stdin-pl", &stdin_pl,
N_("read a pattern list over stdin")),
OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
OPT_END(),
};

setup_git_directory();
revs.repo = the_repository;
repo_init_revisions(the_repository, &revs, NULL);

argc = parse_options(argc, argv, NULL,
options, path_walk_usage,
Expand All @@ -101,6 +105,10 @@ int cmd__path_walk(int argc, const char **argv)
else
usage(path_walk_usage[0]);

/* Apply the filter after setup_revisions to avoid the --objects check. */
if (filter_options.choice)
list_objects_filter_copy(&revs.filter, &filter_options);

info.revs = &revs;
info.path_fn = emit_block;
info.path_fn_data = &data;
Expand Down Expand Up @@ -129,6 +137,7 @@ int cmd__path_walk(int argc, const char **argv)
free(info.pl);
}

list_objects_filter_release(&filter_options);
release_revisions(&revs);
return res;
}
129 changes: 129 additions & 0 deletions t/perf/p5315-pack-objects-filter.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/bin/sh

test_description='Tests pack-objects performance with filters and --path-walk'
. ./perf-lib.sh

test_perf_large_repo

test_expect_success 'setup filter inputs' '
# Sample a few depth-2 directories from the test repo to build
# a cone-mode sparse-checkout definition. The sampling picks
# directories at evenly-spaced positions so the choice is stable
# and scales to repos of any shape.

git ls-tree -d --name-only HEAD >top-dirs &&
top_nr=$(wc -l <top-dirs) &&

>depth2-dirs &&
while read tdir
do
git ls-tree -d --name-only "HEAD:$tdir" 2>/dev/null |
sed "s|^|$tdir/|" >>depth2-dirs || return 1
done <top-dirs &&

d2_nr=$(wc -l <depth2-dirs) &&

if test "$d2_nr" -ge 2
then
# Pick two directories from evenly-spaced positions.
first=$(sed -n "1p" depth2-dirs) &&
mid=$(sed -n "$((d2_nr / 2 + 1))p" depth2-dirs) &&

p1=$(dirname "$first") &&
p2=$(dirname "$mid") &&

# Build cone-mode sparse-checkout patterns.
{
echo "/*" &&
echo "!/*/" &&
echo "/$p1/" &&
echo "!/$p1/*/" &&
if test "$p1" != "$p2"
then
echo "/$p2/" &&
echo "!/$p2/*/"
fi &&
echo "/$first/" &&
if test "$first" != "$mid"
then
echo "/$mid/"
fi
} >sparse-patterns &&

git hash-object -w sparse-patterns >sparse-oid &&
echo "Sparse cone: $first $mid" &&
cat sparse-patterns &&
test_set_prereq SPARSE_OID
elif test "$top_nr" -ge 1
then
# Fallback: use a single top-level directory.
first=$(sed -n "1p" top-dirs) &&
{
echo "/*" &&
echo "!/*/" &&
echo "/$first/"
} >sparse-patterns &&

git hash-object -w sparse-patterns >sparse-oid &&
echo "Sparse cone: $first" &&
cat sparse-patterns &&
test_set_prereq SPARSE_OID
fi
'

test_perf 'repack (no filter)' '
git pack-objects --stdout --revs --all </dev/null >pk
'

test_size 'repack size (no filter)' '
test_file_size pk
'

test_perf 'repack (no filter, --path-walk)' '
git pack-objects --stdout --revs --all --path-walk </dev/null >pk
'

test_size 'repack size (no filter, --path-walk)' '
test_file_size pk
'

test_perf 'repack (blob:none)' '
git pack-objects --stdout --revs --all --filter=blob:none </dev/null >pk
'

test_size 'repack size (blob:none)' '
test_file_size pk
'

test_perf 'repack (blob:none, --path-walk)' '
git pack-objects --stdout --revs --all --path-walk \
--filter=blob:none </dev/null >pk
'

test_size 'repack size (blob:none, --path-walk)' '
test_file_size pk
'

test_perf 'repack (sparse:oid)' \
--prereq SPARSE_OID '
git pack-objects --stdout --revs --all \
--filter=sparse:oid=$(cat sparse-oid) </dev/null >pk
'

test_size 'repack size (sparse:oid)' \
--prereq SPARSE_OID '
test_file_size pk
'

test_perf 'repack (sparse:oid, --path-walk)' \
--prereq SPARSE_OID '
git pack-objects --stdout --revs --all --path-walk \
--filter=sparse:oid=$(cat sparse-oid) </dev/null >pk
'

test_size 'repack size (sparse:oid, --path-walk)' \
--prereq SPARSE_OID '
test_file_size pk
'

test_done
Loading
Loading