Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8bbc257
feat: add characters only rule into simplification array
DecimalTurn May 2, 2026
4cdc01d
feat: remove LS and PS notions from abnf
DecimalTurn May 2, 2026
6356c82
refactor: manke abnf self-contained
DecimalTurn May 2, 2026
d718091
chore: update railroad diagram
DecimalTurn May 2, 2026
3ba8b39
feat: replace false, true, null with simple characters representation
DecimalTurn May 2, 2026
b054a9e
feat: normalize quotation-marrk in railroad diagram
DecimalTurn May 2, 2026
f49f339
feat: change nodes appearance to match ECMA-404
DecimalTurn May 2, 2026
be43cef
refactor: make railroad more similar to ECMA's
DecimalTurn May 3, 2026
b56113d
chore: update railroad diagram
DecimalTurn May 3, 2026
4656a01
chore: update submodule railroad-diagram-generator-js to latest commit
DecimalTurn May 3, 2026
4b05329
feat: simplify single-line-comment-end rules
DecimalTurn May 3, 2026
e712a50
feat: abnf simplified for numbers
DecimalTurn May 3, 2026
718f0af
feat: add missing lowercase letters to hexdigits
DecimalTurn May 3, 2026
d67cab6
feat: add re-orderings in railroad diagrams
DecimalTurn May 3, 2026
78c7eff
chore: update railroad diagram
DecimalTurn May 3, 2026
49bd2d8
refactor: rename to JSONC.abnf
DecimalTurn May 3, 2026
96ff26c
feat: add post-processing for name
DecimalTurn May 3, 2026
239768a
feat: add formal grammar section
DecimalTurn May 3, 2026
e50873d
fix: correct comment in whitespace definition
DecimalTurn May 3, 2026
2bd5795
fix: add missing spacing for comment in diagram
DecimalTurn May 10, 2026
5adbc81
fix: ensure the processed ABNF remains valid by avoiding case insensi…
DecimalTurn May 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 184 additions & 6 deletions generate-railroad.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,78 @@ const { spawnSync } = require("node:child_process");
const path = require("node:path");

// Customization section
const DEFAULT_INPUT_ABNF = "grammar/jsonc.abnf";
const DEFAULT_INPUT_ABNF = "grammar/JSONC.abnf";
const DEFAULT_PROCESSED_ABNF = "grammar/jsonc-processed.abnf";
const DEFAULT_OUTPUT_HTML = "grammar/railroad-diagram.html";
const FORCED_HTML_HEADER = "JSONC GRAMMAR";

// Rules to inline from their %x... definitions as literal ABNF strings.
// Add more rule names here to apply the same transformation.
const INLINE_HEX_RULES = [
"multi-line-comment-start",
"multi-line-comment-end",
"asterisk",
"escape"
"escape",
"single-line-comment-start",
"quotation-mark",
"decimal-point",
"minus",
"plus",
"zero",
];

// Inline selected rule references as quoted literals in specific target rules.
// Add more mappings here to reuse this transformation pattern.
const INLINE_LITERAL_REFS = [
{
targetRule: "value",
referencedRules: ["false", "true", "null"],
},
];

// Move selected rule definitions after another rule in the processed ABNF.
// Add more entries here to control rule ordering in generated output.
const REPOSITION_RULES_AFTER = [
{
ruleName: "begin-array",
afterRule: "array",
},
{
ruleName: "end-array",
afterRule: "begin-array",
},
{
ruleName: "begin-object",
afterRule: "object",
},
{
ruleName: "end-object",
afterRule: "begin-object",
},
{
ruleName: "name-separator",
afterRule: "member",
},
{
ruleName: "value-separator",
afterRule: "value",
},
{
ruleName: "digit",
afterRule: "unescaped",
},
{
ruleName: "digit1-9",
afterRule: "digit",
},
{
ruleName: "hexdigit",
afterRule: "digit1-9",
},
{
ruleName: "four-hexdigits",
afterRule: "hexdigit",
}
];

function escapeRegExp(value) {
Expand All @@ -36,6 +97,24 @@ function decodeAbnfHexSequence(value) {
return String.fromCodePoint(...bytes);
}

function getHexRuleSequence(source, ruleName) {
const escapedRuleName = escapeRegExp(ruleName);
const ruleRegex = new RegExp(
`^\\s*${escapedRuleName}\\s*=\\s*(%x[0-9A-Fa-f]+(?:\\.[0-9A-Fa-f]+)*)\\b.*$`,
"m",
);
const ruleMatch = source.match(ruleRegex);
if (!ruleMatch) {
throw new Error(`Rule ${ruleName} was not found.`);
}

return ruleMatch[1];
}

function getHexRuleLiteral(source, ruleName) {
return decodeAbnfHexSequence(getHexRuleSequence(source, ruleName));
}

function inlineHexRuleAsLiteral(source, ruleName) {
const escapedRuleName = escapeRegExp(ruleName);
const ruleRegex = new RegExp(
Expand All @@ -50,10 +129,10 @@ function inlineHexRuleAsLiteral(source, ruleName) {
const hexSequence = ruleMatch[1];
const literalChars = decodeAbnfHexSequence(hexSequence);

// For backslash or other problematic characters, keep them as hex format
// ABNF doesn't support backslash escaping in quoted strings
// Keep hex format for characters that cannot be represented safely
// as a single ABNF quoted string literal.
let replacement;
if (literalChars === "\\") {
if (literalChars === "\\" || literalChars === '"') {
replacement = hexSequence;
} else {
// For other characters, escape only double quotes (not backslashes)
Expand Down Expand Up @@ -90,16 +169,104 @@ function inlineHexRuleAsLiteral(source, ruleName) {
.join("\n");
}

function inlineLiteralRefsInTargetRule(source, targetRule, referencedRules) {
const escapedTargetRule = escapeRegExp(targetRule);
const targetRuleRegex = new RegExp(`^(\\s*${escapedTargetRule}\\s*=\\s*)(.*)$`, "m");
const match = source.match(targetRuleRegex);
if (!match) {
throw new Error(`Rule ${targetRule} was not found.`);
}

const targetRulePrefix = match[1];
const targetRuleRhs = match[2];

let updatedRhs = targetRuleRhs;
for (const referencedRule of referencedRules) {
const replacementLiteral = getHexRuleSequence(source, referencedRule);
const referencedRuleRegex = new RegExp(
`(?<![A-Za-z0-9-])${escapeRegExp(referencedRule)}(?![A-Za-z0-9-])`,
"g",
);
updatedRhs = updatedRhs.replace(referencedRuleRegex, replacementLiteral);
}

return source.replace(targetRuleRegex, `${targetRulePrefix}${updatedRhs}`);
Comment thread
DecimalTurn marked this conversation as resolved.
}

function removeRuleDefinitions(source, ruleNames) {
const removalSet = new Set(ruleNames);

return source
.split(/\r?\n/)
.filter((line) => {
const match = line.match(/^\s*([A-Za-z][A-Za-z0-9-]*)\s*=/);
if (!match) {
return true;
}
return !removalSet.has(match[1]);
})
.join("\n");
}

function findRuleBlock(lines, ruleName) {
const ruleStartRegex = new RegExp(`^\\s*${escapeRegExp(ruleName)}\\s*=`);
const startIndex = lines.findIndex((line) => ruleStartRegex.test(line));
if (startIndex === -1) {
throw new Error(`Rule ${ruleName} was not found.`);
}

let endIndex = startIndex + 1;
while (endIndex < lines.length && /^\s/.test(lines[endIndex])) {
endIndex += 1;
}

return {
startIndex,
endIndex,
blockLines: lines.slice(startIndex, endIndex),
};
}

function repositionRulesAfter(source, reorderings) {
let lines = source.split(/\r?\n/);

for (const { ruleName, afterRule } of reorderings) {
const ruleBlock = findRuleBlock(lines, ruleName);
lines.splice(ruleBlock.startIndex, ruleBlock.endIndex - ruleBlock.startIndex);

const afterRuleBlock = findRuleBlock(lines, afterRule);
lines.splice(afterRuleBlock.endIndex, 0, ...ruleBlock.blockLines);
}

return lines.join("\n");
}

function processAbnfSource(source) {
let processed = source;

for (const ruleName of INLINE_HEX_RULES) {
processed = inlineHexRuleAsLiteral(processed, ruleName);
}

for (const { targetRule, referencedRules } of INLINE_LITERAL_REFS) {
processed = inlineLiteralRefsInTargetRule(processed, targetRule, referencedRules);
processed = removeRuleDefinitions(processed, referencedRules);
}

processed = repositionRulesAfter(processed, REPOSITION_RULES_AFTER);

return processed;
}

function postProcessGeneratedHtml(htmlPath) {
const html = fs.readFileSync(htmlPath, "utf8");
const updated = html.replace(/<h1>[^<]*<\/h1>/, `<h1>${FORCED_HTML_HEADER}</h1>`);

if (updated !== html) {
fs.writeFileSync(htmlPath, updated, "utf8");
}
}

const args = process.argv.slice(2);
const titleIndex = args.indexOf("--title");

Expand Down Expand Up @@ -173,4 +340,15 @@ if (result.error) {
process.exit(1);
}

process.exit(result.status === null ? 1 : result.status);
if (result.status !== 0) {
process.exit(result.status === null ? 1 : result.status);
}

try {
postProcessGeneratedHtml(outputPath);
} catch (error) {
console.error(`Failed to post-process generated HTML: ${error.message}`);
process.exit(1);
}

process.exit(0);
55 changes: 26 additions & 29 deletions grammar/jsonc.abnf → grammar/JSONC.abnf
Original file line number Diff line number Diff line change
@@ -1,38 +1,29 @@
; JSONC grammar with comments support (RFC 8259 extended with JavaScript-style comments)
;
; Notes:
; - Rule names and structure follow RFC 8259 ABNF snippets.
; - DIGIT and HEXDIG are core rules from RFC 5234.
; - comments are an extension not in RFC 8259.
; - Rule names and structure follow RFC 8259 ABNF.
; - Comments are an extension not in RFC 8259.
; - Trailing commas are NOT supported in this grammar.

; A JSONC-text is a serialized value surrounded by optional whitespace and comments.
; Comments can appear anywhere insignificant whitespace is allowed in JSON.
JSONC-text = wsc value wsc

; Whitespace with Comments: zero or more whitespace characters or comments
wsc = *(ws-char / comment)
wsc = *(ws-char / comment) ; Whitespace and/or comments

; Single whitespace character (space, tab, line feed, carriage return)
ws-char = %x20 / %x09 / %x0A / %x0D ; space / tab / LF / CR

; Comments: single-line or multi-line
comment = single-line-comment / multi-line-comment

; Source character: any Unicode code point, as per ECMAScript.
source-character = %x00-10FFFF

; Comment terminators and sequences (based on ECMAScript line terminators)
comment-terminator = %x0A / %x0D / %x2028 / %x2029 ; LF / CR / LS / PS
comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029

; Single-line comment: starts with //, continues until line ending
; Terminator is not part of the comment body.
; Note that the single-line-comment-end is optional, allowing comments to end at the end of the file without a line terminator.
single-line-comment-start = %x2F.2F ; // double solidus
single-line-comment-end = comment-terminator-sequence
single-line-comment-end = %x0D.0A / %x0A / %x0D
single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
single-line-comment-char = %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators
single-line-comment-char = %x00-09 / %x0B-0C / %x0E-10FFFF ; Any source character except CR and LF (line terminator)

; Multi-line comment: /* ... */
; Cannot be nested. The first */ closes the comment.
Expand All @@ -58,7 +49,7 @@ name-separator = wsc %x3A wsc ; : colon
value-separator = wsc %x2C wsc ; , comma

; Any JSON value
value = false / null / true / object / array / number / string
value = object / array / number / string / true / false / null

; Literal names (boolean values and null)
false = %x66.61.6C.73.65 ; false
Expand All @@ -73,31 +64,37 @@ member = string name-separator value
array = begin-array [ value *( value-separator value ) ] end-array

; Numbers
number = [ minus ] int [ frac ] [ exp ]
number = [ minus ] ( zero / ( digit1-9 *digit ) ) [ decimal-point 1*digit ] [ ( %x65 / %x45 ) [ minus / plus ] 1*digit ]
decimal-point = %x2E ; .
digit = %x30-39 ; 0-9
digit1-9 = %x31-39 ; 1-9
e = %x65 / %x45 ; e E
exp = e [ minus / plus ] 1*DIGIT
frac = decimal-point 1*DIGIT
int = zero / ( digit1-9 *DIGIT )

minus = %x2D ; -
plus = %x2B ; +
zero = %x30 ; 0
hexdigit = digit /
%x41 / %x61 / ; A a
%x42 / %x62 / ; B b
%x43 / %x63 / ; C c
%x44 / %x64 / ; D d
%x45 / %x65 / ; E e
%x46 / %x66 ; F f
four-hexdigits = 4hexdigit

; Strings
string = quotation-mark *char quotation-mark

char = unescaped /
escape (
%x22 / ; " quotation mark U+0022
%x5C / ; \ reverse solidus U+005C
%x2F / ; / solidus U+002F
%x62 / ; b backspace U+0008
%x66 / ; f form feed U+000C
%x6E / ; n line feed U+000A
%x72 / ; r carriage return U+000D
%x74 / ; t tab U+0009
%x75 4HEXDIG ; uXXXX U+XXXX
%x22 / ; " quotation mark U+0022
%x5C / ; \ reverse solidus U+005C
%x2F / ; / solidus U+002F
%x62 / ; b backspace U+0008
%x66 / ; f form feed U+000C
%x6E / ; n line feed U+000A
%x72 / ; r carriage return U+000D
%x74 / ; t tab U+0009
%x75 four-hexdigits ; uXXXX U+XXXX
)

escape = %x5C ; \
Expand Down
8 changes: 4 additions & 4 deletions grammar/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This directory contains the ABNF grammar for JSONC, along with plans for generat

## Railroad Diagram Generation Plan

Generate railroad diagrams from `grammar/jsonc.abnf` using a simple one-file Node.js script.
Generate railroad diagrams from `grammar/JSONC.abnf` using a simple one-file Node.js script.

Instead of building a custom ABNF parser and converter to Tab Atkins constructor calls, use:

Expand All @@ -18,7 +18,7 @@ The wrapper script should:

1. Accept input ABNF path and optional output HTML path.
2. Default to:
- input: `grammar/jsonc.abnf`
- input: `grammar/JSONC.abnf`
- output: `grammar/railroad-diagram.html`
3. Optionally accept `--title` to set the HTML title.
4. Execute the upstream CLI from our installed dependency.
Expand Down Expand Up @@ -53,13 +53,13 @@ npm run railroad
Generate from a specific input and output:

```bash
npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html
npm run railroad -- grammar/JSONC.abnf grammar/railroad-diagram.html
```

Generate with a custom title:

```bash
npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html --title "JSONC Grammar"
npm run railroad -- grammar/JSONC.abnf grammar/railroad-diagram.html --title "JSONC Grammar"
```

### Notes on EOF for single-line comments
Expand Down
Loading