Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

# actions/checkout sets safe.directory under a temporary HOME that is
# reverted when the step ends, so subsequent steps run as root see the
# workspace as "not a git repository". Re-add it to the persistent config.
- name: Mark workspace as safe for git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"

- name: Install Zig
uses: mlugg/setup-zig@v2
with:
Expand Down
15 changes: 9 additions & 6 deletions src/api/load.zig
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pub fn loadStreamWithOptions(allocator: std.mem.Allocator, input: []const u8, op
var load_failure: loader.LoadFailure = .unknown;
const documents = loader.loadStreamFromEventsWithFailure(
arena.allocator(),
allocator,
event_stream.events,
options.schema,
options.duplicate_key_behavior,
Expand Down Expand Up @@ -103,22 +104,24 @@ pub fn loadStreamWithOptions(allocator: std.mem.Allocator, input: []const u8, op
fn loadStreamFastPath(allocator: std.mem.Allocator, input: []const u8, options: LoadOptions) Error!?LoadedStream {
if (std.mem.indexOfScalar(u8, input, '*') != null) return null;

var arena = std.heap.ArenaAllocator.init(allocator);
errdefer arena.deinit();
const arena_allocator = arena.allocator();

const event_stream = try parse.parseEventsWithOptions(arena_allocator, input, .{
var event_stream = try parse.parseEventsWithOptions(allocator, input, .{
.max_input_bytes = options.max_input_bytes,
.max_event_count = options.max_event_count,
.max_token_count = options.max_token_count,
.max_nesting_depth = options.max_nesting_depth,
.max_scalar_bytes = options.max_scalar_bytes,
.diagnostic = options.diagnostic,
});
defer event_stream.deinit();

var arena = std.heap.ArenaAllocator.init(allocator);
errdefer arena.deinit();
const arena_allocator = arena.allocator();

var load_failure: loader.LoadFailure = .unknown;
const documents = loader.loadStreamFromEventsWithFailure(
arena_allocator,
allocator,
event_stream.events,
options.schema,
options.duplicate_key_behavior,
Expand All @@ -127,7 +130,7 @@ fn loadStreamFastPath(allocator: std.mem.Allocator, input: []const u8, options:
options.max_alias_expansion,
options.max_document_count,
&load_failure,
false,
true,
) catch |err| {
if (options.diagnostic) |diagnostic| {
if (diagnostic.message.len == 0) {
Expand Down
8 changes: 6 additions & 2 deletions src/loader/construct.zig
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub fn constructStream(
unknown_tag_behavior: UnknownTagBehavior,
) Error![]const *const Node {
return constructStreamWithFailure(
allocator,
allocator,
documents,
selected_schema,
Expand All @@ -51,6 +52,7 @@ pub fn constructStream(
/// copied into `allocator` so loaded values outlive parser event storage.
pub fn constructStreamWithFailure(
allocator: std.mem.Allocator,
temporary_allocator: std.mem.Allocator,
documents: []const *const graph.Node,
selected_schema: Schema,
duplicate_key_behavior: DuplicateKeyBehavior,
Expand All @@ -60,6 +62,7 @@ pub fn constructStreamWithFailure(
) Error![]const *const Node {
var constructor: Constructor = .{
.allocator = allocator,
.temporary_allocator = temporary_allocator,
.schema = selected_schema,
.duplicate_key_behavior = duplicate_key_behavior,
.unknown_tag_behavior = unknown_tag_behavior,
Expand All @@ -71,6 +74,7 @@ pub fn constructStreamWithFailure(

const Constructor = struct {
allocator: std.mem.Allocator,
temporary_allocator: std.mem.Allocator,
schema: Schema,
duplicate_key_behavior: DuplicateKeyBehavior,
unknown_tag_behavior: UnknownTagBehavior,
Expand Down Expand Up @@ -197,12 +201,12 @@ const Constructor = struct {
.tag = try copyOptionalSlice(self.allocator, mapping.tag),
} };
if (tag.isStandardSetTag(mapping.tag)) {
duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| {
duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| {
self.recordFailure(.invalid_standard_tag);
return err;
};
} else if (self.duplicate_key_behavior == .reject) {
duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| {
duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| {
self.recordFailure(.duplicate_key);
return err;
};
Expand Down
133 changes: 103 additions & 30 deletions src/loader/direct.zig
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub fn loadStreamFromEvents(
const summary = limit.summarizeEvents(events);
try limit.checkSummary(summary, .{ .max_document_count = max_document_count }, load_failure);
return loadStreamFromEventsWithSummary(
allocator,
allocator,
events,
selected_schema,
Expand All @@ -57,6 +58,7 @@ pub fn loadStreamFromEvents(

pub fn loadStreamFromEventsWithSummary(
allocator: std.mem.Allocator,
temporary_allocator: std.mem.Allocator,
events: []const Event,
selected_schema: Schema,
duplicate_key_behavior: DuplicateKeyBehavior,
Expand All @@ -66,6 +68,7 @@ pub fn loadStreamFromEventsWithSummary(
) Error![]const *const Node {
return loadStreamFromEventsWithStringPolicy(
allocator,
temporary_allocator,
events,
selected_schema,
duplicate_key_behavior,
Expand All @@ -87,6 +90,7 @@ pub fn loadStreamFromEventsBorrowingStringsWithSummary(
load_failure: ?*LoadFailure,
) Error![]const *const Node {
return loadStreamFromEventsWithStringPolicy(
allocator,
allocator,
events,
selected_schema,
Expand All @@ -100,6 +104,7 @@ pub fn loadStreamFromEventsBorrowingStringsWithSummary(

pub fn loadStreamFromEventsWithStringPolicy(
allocator: std.mem.Allocator,
temporary_allocator: std.mem.Allocator,
events: []const Event,
selected_schema: Schema,
duplicate_key_behavior: DuplicateKeyBehavior,
Expand All @@ -109,9 +114,17 @@ pub fn loadStreamFromEventsWithStringPolicy(
copy_strings: bool,
) Error![]const *const Node {
if (summary.has_aliases) return ParseError.Unsupported;
const collection_child_counts = buildDirectCollectionChildCounts(temporary_allocator, events) catch |err| {
if (err == ParseError.InvalidSyntax) recordFailure(load_failure, .invalid_graph);
return err;
};
defer if (collection_child_counts.len > 0) temporary_allocator.free(collection_child_counts);

var loader: DirectLoader = .{
.allocator = allocator,
.temporary_allocator = temporary_allocator,
.events = events,
.collection_child_counts = collection_child_counts,
.schema = selected_schema,
.duplicate_key_behavior = duplicate_key_behavior,
.unknown_tag_behavior = unknown_tag_behavior,
Expand All @@ -125,7 +138,10 @@ pub fn loadStreamFromEventsWithStringPolicy(

const DirectLoader = struct {
allocator: std.mem.Allocator,
temporary_allocator: std.mem.Allocator,
events: []const Event,
collection_child_counts: []const usize,
collection_child_count_index: usize = 0,
schema: Schema,
duplicate_key_behavior: DuplicateKeyBehavior,
unknown_tag_behavior: UnknownTagBehavior,
Expand Down Expand Up @@ -177,8 +193,8 @@ const DirectLoader = struct {

return switch (current) {
.scalar => |scalar| self.constructScalar(scalar),
.sequence_start => |collection| self.constructSequence(collection),
.mapping_start => |collection| self.constructMapping(collection),
.sequence_start => |collection| self.constructSequence(try self.nextCollectionChildCount(), collection),
.mapping_start => |collection| self.constructMapping(try self.nextCollectionChildCount(), collection),
.alias => ParseError.Unsupported,
else => self.invalidGraph(),
};
Expand Down Expand Up @@ -208,13 +224,13 @@ const DirectLoader = struct {
return node;
}

fn constructSequence(self: *DirectLoader, collection: parser_event.CollectionStart) Error!*const Node {
fn constructSequence(self: *DirectLoader, child_count: usize, collection: parser_event.CollectionStart) Error!*const Node {
const node = try self.nodes.create();
try self.validateTag(collection.tag, .sequence);

var items: std.ArrayList(*const Node) = .empty;
errdefer items.deinit(self.allocator);
try items.ensureTotalCapacity(self.allocator, countDirectCollectionNodes(self.events, self.index, false));
try items.ensureTotalCapacity(self.allocator, child_count);

while (self.index < self.events.len and self.events[self.index] != .sequence_end) {
try items.append(self.allocator, try self.constructNode());
Expand All @@ -240,13 +256,13 @@ const DirectLoader = struct {
return node;
}

fn constructMapping(self: *DirectLoader, collection: parser_event.CollectionStart) Error!*const Node {
fn constructMapping(self: *DirectLoader, child_count: usize, collection: parser_event.CollectionStart) Error!*const Node {
const node = try self.nodes.create();
try self.validateTag(collection.tag, .mapping);

var pairs: std.ArrayList(MappingPair) = .empty;
errdefer pairs.deinit(self.allocator);
try pairs.ensureTotalCapacity(self.allocator, countDirectCollectionNodes(self.events, self.index, true) / 2);
try pairs.ensureTotalCapacity(self.allocator, child_count / 2);

while (self.index < self.events.len and self.events[self.index] != .mapping_end) {
const key = try self.constructNode();
Expand All @@ -268,12 +284,12 @@ const DirectLoader = struct {
} };
if (tag.isStandardSetTag(collection.tag)) {
try self.validateStandardSetContent(owned_pairs);
duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| {
duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| {
self.recordFailure(.invalid_standard_tag);
return err;
};
} else if (self.duplicate_key_behavior == .reject) {
duplicate_key.validateUniqueMappingKeys(self.allocator, owned_pairs) catch |err| {
duplicate_key.validateUniqueMappingKeys(self.temporary_allocator, owned_pairs) catch |err| {
self.recordFailure(.duplicate_key);
return err;
};
Expand Down Expand Up @@ -360,46 +376,103 @@ const DirectLoader = struct {
fn retainOptionalSlice(self: *DirectLoader, maybe_value: ?[]const u8) std.mem.Allocator.Error!?[]const u8 {
return if (maybe_value) |slice| try self.retainSlice(slice) else null;
}

fn nextCollectionChildCount(self: *DirectLoader) Error!usize {
if (self.collection_child_count_index >= self.collection_child_counts.len) return self.invalidGraph();
const child_count = self.collection_child_counts[self.collection_child_count_index];
self.collection_child_count_index += 1;
return child_count;
}
};

fn isSetNullValue(node: *const Node) bool {
return switch (node.*) {
.null_value => true,
.scalar => |scalar| schema.isCoreNullScalar(scalar.value, scalar.style == .plain, scalar.tag),
else => false,
};
}
fn buildDirectCollectionChildCounts(allocator: std.mem.Allocator, events: []const Event) Error![]usize {
const collection_count = countCollectionStarts(events);
if (collection_count == 0) return &[_]usize{};

fn countDirectCollectionNodes(events: []const Event, start: usize, mapping: bool) usize {
var count: usize = 0;
var depth: usize = 0;
var index = start;
while (index < events.len) : (index += 1) {
const event_value = events[index];
if (depth == 0 and collectionEnded(event_value, mapping)) return count;
if (depth == 0 and eventStartsNode(event_value)) count += 1;
const counts = try allocator.alloc(usize, collection_count);
errdefer allocator.free(counts);
@memset(counts, 0);

const stack = try allocator.alloc(usize, collection_count);
defer allocator.free(stack);

var stack_len: usize = 0;
var next_collection: usize = 0;
for (events) |event_value| {
switch (event_value) {
.sequence_start, .mapping_start => depth += 1,
.sequence_start, .mapping_start => {
if (stack_len > 0) counts[stack[stack_len - 1]] += 1;
stack[stack_len] = next_collection;
stack_len += 1;
next_collection += 1;
},
.scalar, .alias => {
if (stack_len > 0) counts[stack[stack_len - 1]] += 1;
},
.sequence_end, .mapping_end => {
if (depth > 0) depth -= 1;
if (stack_len == 0) return ParseError.InvalidSyntax;
stack_len -= 1;
},
else => {},
}
}

if (stack_len != 0) return ParseError.InvalidSyntax;
if (next_collection != collection_count) return ParseError.InvalidSyntax;
return counts;
}

fn countCollectionStarts(events: []const Event) usize {
var count: usize = 0;
for (events) |event_value| {
switch (event_value) {
.sequence_start, .mapping_start => count += 1,
else => {},
}
}
return count;
}

fn collectionEnded(event_value: Event, mapping: bool) bool {
return if (mapping) event_value == .mapping_end else event_value == .sequence_end;
fn recordFailure(load_failure: ?*LoadFailure, load_failure_value: LoadFailure) void {
if (load_failure) |target| {
if (target.* == .unknown) target.* = load_failure_value;
}
}

fn eventStartsNode(event_value: Event) bool {
return switch (event_value) {
.scalar, .sequence_start, .mapping_start => true,
fn isSetNullValue(node: *const Node) bool {
return switch (node.*) {
.null_value => true,
.scalar => |scalar| schema.isCoreNullScalar(scalar.value, scalar.style == .plain, scalar.tag),
else => false,
};
}

test {
std.testing.refAllDecls(@This());
}

test "direct loader precomputes collection child counts" {
const events = [_]Event{
.stream_start,
.{ .document_start = .{} },
.{ .mapping_start = .{ .style = .flow } },
.{ .scalar = .{ .value = "items" } },
.{ .sequence_start = .{ .style = .flow } },
.{ .scalar = .{ .value = "one" } },
.{ .mapping_start = .{ .style = .flow } },
.{ .scalar = .{ .value = "key" } },
.{ .scalar = .{ .value = "value" } },
.mapping_end,
.sequence_end,
.mapping_end,
.{ .document_end = .{} },
.stream_end,
};

const counts = try buildDirectCollectionChildCounts(std.testing.allocator, &events);
defer std.testing.allocator.free(counts);

try std.testing.expectEqual(@as(usize, 2), counts[0]);
try std.testing.expectEqual(@as(usize, 2), counts[1]);
try std.testing.expectEqual(@as(usize, 2), counts[2]);
}
Loading
Loading