"use strict";
/** @fileOverview Implements `class page.BNF`.
* @author Axel T. Schreiner <ats@cs.rit.edu>
* @version 1.5.1
*/
/** Creates a new grammar representation; creates the `$accept` non-terminal, `$eof` *end of file* literal, and `error` token.
* @class Represents a grammar as ordered pairs; can compute first and follow sets and generate a tokenizer.
*
* @property {page.BNF.T[]} lits list of unique literals, can be pushed.
* @property {Map} litsByValue maps value to unique literal;
* transient, maintained by {@link page.BNF#lit}.
* @property {page.BNF.T[]} tokens list of unique tokens, can be pushed.
* @property {Map} tokensByName maps name to unique token;
* transient, maintained by {@link page.BNF#token}.
* @property {page.BNF.NT[]} nts list of unique non-terminals, can be pushed.
* @property {Map} ntsByName maps name to unique non-terminal;
* transient, maintained by {@link page.BNF#nt}.
* @property {page.BNF.Rule[]} rules list of grammar rules, can be pushed.
* @property {number} errors counted by {@link page.error} method; transient.
*/
page.BNF = function () {
this.lits = [ ];
this.litsByValue = { };
this.tokens = [ ];
this.tokensByName = { };
this.nts = [ ];
this.ntsByName = { };
this.rules = [];
this.errors = 0;
// create rule zero and the singletons.
this.rules.push( this.rule(this.nt()) ); // rule zero: $accept, completed later
this.lit(); // $eof
this.token(); // $error
};
page.baseclass(page.BNF, 'page.BNF');
/** Factory method: Represent a grammar described in BNF.
* This is bootstrapped using {@link page.BNF.ctor} and {@link page.BNF.builder}
* to create an LL(1) compiler for BNF.
* @static
*
* @param {string} _grammar description in BNF;
* comments extend from `#` to the end of a line.
*
* @returns {page.BNF} a new grammar representation; check `.errors`.
*/
page.BNF.from = function (_grammar) {
page.assert('__WHERE__', typeof _grammar == 'string');
// create a compiler which recognizes BNF
var bnfCompiler = new page.LL1(page.BNF.ctor());
page.assert('__WHERE__', bnfCompiler.ambiguous == 0 && bnfCompiler.greedy == 0 && bnfCompiler.errors == 0);
// build the new grammar
var bnf = new page.BNF();
try {
bnfCompiler.parse(bnfCompiler.tokenizer(/\s+|#.*/)(_grammar).concat([null]), bnfCompiler.reflect(bnf, page.BNF.builder));
} catch (e) {
page.assert('__WHERE__', e instanceof Error);
bnfCompiler.error(e);
}
bnf.errors += bnfCompiler.errors;
return bnf;
};
/** Nodes in a BNF grammar.
* @typedef Node
* @type page.BNF.NT|page.BNF.Lit|page.BNF.Token
*/
/** The BNF grammar manually represented with {@link page.BNF}.
* @static
*
* @returns {page.BNF} the right-recursive BNF grammar for BNF, hand-crafted.
*/
page.BNF.ctor = function () {
var g = new page.BNF();
g.init(g.nt('grammar'),
[ g.rule(g.nt('grammar'), [ g.nt('tokens'), g.nt('rules') ]),
g.rule(g.nt('tokens'), [ ]),
g.rule(g.nt('tokens'), [ g.nt('token'), g.nt('tokens') ]),
g.rule(g.nt('token'), [ g.lit('%token'),
g.token('Name', '[a-zA-Z][-a-zA-Z0-9_]*'), // alphanum with _ and -
g.token('Literal', "'(?:[^'\n\\\\]|\\\\['n\\\\])+'"), // single quoted
g.lit(';') ]),
g.rule(g.nt('rules'), [ g.nt('rule'), g.nt('more_rules') ]),
g.rule(g.nt('more_rules'), [ ]),
g.rule(g.nt('more_rules'), [ g.nt('rule'), g.nt('more_rules') ]),
g.rule(g.nt('rule'),
[ g.nt('nt'), g.lit(':'), g.nt('rhs'), g.lit(';') ]),
g.rule(g.nt('nt'), [ g.token('Name') ]),
g.rule(g.nt('rhs'), [ ]),
g.rule(g.nt('rhs'), [ g.nt('token_or_nt' ), g.nt('rhs') ]),
g.rule(g.nt('rhs'), [ g.nt('lit'), g.nt('rhs') ]),
g.rule(g.nt('token_or_nt'), [ g.token('Name') ]),
g.rule(g.nt('lit'), [ g.token('Literal') ])
]);
return g;
};
{
/** The right-recursive BNF grammar for BNF.
* @static
*/
page.BNF.grammar =
" %token Name '[a-zA-Z][-a-zA-Z0-9_]*'; \n" +
" %token Literal '\\'(?:[^\\'\\n\\\\\\\\]|\\\\\\\\[\\'n\\\\\\\\])+\\''; \n" +
" grammar: tokens rules; \n" +
" tokens: ; \n" +
" tokens: token tokens; \n" +
" token: '%token' Name Literal ';'; \n" +
" rules: rule more_rules; \n" +
" more_rules: ; \n" +
" more_rules: rule more_rules; \n" +
" rule: nt ':' rhs ';'; \n" +
" nt: Name; \n" +
" rhs: ; \n" +
" rhs: token_or_nt rhs; \n" +
" rhs: lit rhs; \n" +
" token_or_nt: Name; \n" +
" lit: Literal; \n";
}
/** An object with action functions corresponding to
* {@link page.BNF.grammar} for {@link page.LL1#reflect}
* which require a new {@link page.BNF} as environment to represent
* a grammar in BNF notation using this environment.
* @static
*/
page.BNF.builder = {
grammar: // tokens rules
function (_values, _bnf) {
_bnf.init(null, _values);
return _bnf;
},
// tokens:
// tokens: token tokens
// defaulted, collects empty list
token: // '%token' Name Literal ';'
function (_values, _bnf, _tuple) {
if (_values[0] in _bnf.tokensByName)
_bnf.error('('+_tuple.lineno+')', _values[0], 'is already defined');
else
_bnf.token(_values[0], page.unescape(_values[1]));
return []; // only collect tokens in _bnf
},
// rules: rule more_rules
// more_rules:
// more_rules: rule more_rules
// defaulted, collects array of rules in order
rule: // nt ':' rhs ';'
function (_values, _bnf) {
return _bnf.rule(_values[0], _values.slice(1));
},
nt: // Name
function (_values, _bnf, _tuple) {
if (_values[0] in _bnf.tokensByName) {
_bnf.error('('+_tuple.lineno+')', _values[0], 'is already defined as a token');
_values[0] += '$'; // kludge
}
return _bnf.nt(_values[0]);
},
// rhs:
// rhs: token_or_nt rhs
// rhs: lit rhs
// defaulted, collects array of items in order
token_or_nt: // Name
function (_values, _bnf) {
if (_values[0] in _bnf.tokensByName)
return _bnf.token(_values[0]);
return _bnf.nt(_values[0]);
},
lit: // Literal
function (_values, _bnf) {
return _bnf.lit(page.unescape(_values[0]));
}
};
/** Displays description of grammar and number of errors if any.
* @returns {string}
*/
page.BNF.prototype.toString = function () {
var result = '';
result += this.rules.map(function (rule, n) {
return (n < 10 ? ' ' : '') + n + ' ' + rule;
}).join('\n');
result += '\n\nnon-terminals: ' + this.nts.join(' ');
result += '\nliterals: ' + this.lits.join(' ');
result += '\ntokens:\t' + this.tokens.map(function (token) {
return token + ' ' + page.escape(token.pattern);
}).join('\n\t');
if (this.errors > 0)
result += '\n\nerrors: ' + this.errors;
return result;
};
/** Displays a grammar and all non-terminals with name and contents of all sets.
* @returns {string}
*/
page.BNF.prototype.dump = function () {
var result = this.rules.map(function (rule) { return rule.dump(); }).join('\n');
result += '\n\n' + this.nts.map(function (nt) {
return nt.dump();
}).join('\n\n');
return result;
};
/** Must be called once; common code to complete grammar initialization.
*
* Adds rules passed as a parameter to {@link page.BNF}`.rules`.
*
* Sorts literals in reverse order, i.e., longer ones before shorter prefixes.
* Defines {@link page.BNF.T}`.ord` and {@link page.BNF.NT}`.ord` as globally
* unique index (literals before tokens before non-terminals).
* Sets {@link page.BNF.T}`.first` for each terminal to contain itself.
* @private
*
* @param {page.BNF.Rule[]} [_rules] (additional) grammar rules.
*/
page.BNF.prototype.initTree = function (_rules) {
// closure
var self = this;
// none or valid rules
!_rules || _rules.forEach(function (rule) {
self.assert('__WHERE__', rule instanceof page.BNF.Rule);
});
// add to rules if any
if (_rules)
_rules.forEach(function (rule) {
self.rules.push(rule);
});
self.assert('__WHERE__', self.rules.length > 1);
// sort in reverse, keep $eof in front
self.lits = [ self.lits[0] ].concat(self.lits.slice(1).sort(
function (_a, _b) {
if (_a == _b) return 0;
return _a.literal < _b.literal ? 1 : -1;
}));
// ord literals before tokens before non-terminals, create first for terminals
self.lits.forEach(function (lit, n) {
(lit.first = {})[ lit.ord = Number(n) ] = lit;
});
self.tokens.forEach(function (token, n) {
(token.first = {})[ token.ord = self.lits.length + Number(n) ] = token;
});
self.nts.forEach(function (nt, n) {
nt.ord = self.tokens.length + self.lits.length + Number(n);
});
};
/** Must be called once to complete BNF grammar initialization.
*
* Completes rule 0 with a new start symbol `$accept`
* to accept the grammar's original start symbol and then `$eof`.
*
* Sets {@link page.BNF.Rule}`.index` as index into the rules array.
* Sets `empty` for each rule and its non-terminal which has an empty right-hand side.
*
* Checks that all non-terminals are defined, can be reached,
* and cannot expand infinitely without generating terminals.
*
* Computes `first` and `follow` for all rules and non-terminals.
*
* @param {page.BNF.NT} [_start] start symbol of grammar,
* default is the non-terminal of rule 1.
* @param {page.BNF.Rule[]} [_rules] (additional) grammar rules.
*
* @see page.BNF#initTree
*/
page.BNF.prototype.init = function (_start, _rules) {
var self = this;
// none or valid start symbol
!_start || self.assert('__WHERE__', _start instanceof page.BNF.NT);
// common code
self.initTree(_rules);
// use once only: rule 0 not yet filled in
self.assert('__WHERE__', self.rules[0].symbols.length == 0);
// rule zero: $accept: start $eof
self.rules[0].symbols.push(_start ? _start : self.rules[1].nt);
self.rules[0].symbols.push(self.lit());
// set empty for all rules and their nt with empty rhs
self.rules.forEach(function (rule) {
if (rule.symbols.length == 0) rule.nt.empty = rule.empty = true;
});
// index the rules
self.rules.forEach(function (rule, n) { rule.index = Number(n); });
// check that all non-terminals are defined
self.nts.forEach(function (nt) {
if (!nt.rules.length) self.error(nt, 'is not defined by a rule');
});
// check recursively that all non-terminals can be reached, starting at rule 0
(function setReached (rule) {
if (!rule.reached) {
rule.reached = rule.nt.reached = true; // reached rule and rule's nt
rule.symbols.forEach(function (nt) { // for each nt in rhs
if (nt instanceof page.BNF.NT)
nt.rules.forEach(setReached); // reached each rule for that nt, too
});
}
})(self.rules[0]);
// check
self.nts.forEach(function (nt) {
if (!nt.reached)
self.error(nt, 'cannot be reached from rule zero');
});
// check that all non-terminals have a finite expansion
var changed;
do {
changed = false; // until no more changes
self.rules.forEach(function (rule) { // check rule
if (!rule.nt.finite && // undecided?
rule.symbols.some(function (symbol) { // any terminal or finite non-terminal
return symbol instanceof page.BNF.T || symbol.finite;
}))
changed = rule.finite = rule.nt.finite = true; // changed: finite!
});
} while (changed);
// check
self.nts.forEach(function (nt) {
if (!nt.finite && !nt.empty)
self.error(nt, 'is infinitely recursive');
});
// compute first for non-terminals and rules
do {
changed = false; // until no more changes
// for each rule with non-empty rhs
self.rules.forEach(function (rule, r) {
if (rule.symbols.length > 0) {
// for each symbol in rhs, as long as they accept empty
if (rule.symbols.every(function (symbol) {
// add symbol's first to rule's first
if (page.merge(rule.first, symbol.first)) changed = true;
// continue only if nt accepts empty
return symbol instanceof page.BNF.NT ? symbol.empty : false;
}))
// if all accept empty, so does rule
if (!rule.empty) changed = rule.empty = true;
// add rule's first to nt's first, ditto empty
if (page.merge(rule.nt.first, rule.first)) changed = true;
if (!rule.nt.empty && rule.empty) changed = rule.nt.empty = true;
}
});
} while (changed);
// compute follow for non-terminals
if (!Object.reduceRight || !Object.assign)
do {
changed = false; // until no more changes
// for each rule beginning with rule zero $accept: start_nt $eof_literal
self.rules.forEach(function (rule) {
// over rhs in reverse order
if (rule.symbols.length > 0) {
// nt:..last add nt.follow to last.follow
var last = rule.symbols[rule.symbols.length - 1],
follow = rule.nt.follow;
if (last instanceof page.BNF.NT)
if (page.merge(last.follow, follow)) changed = true;
// nt:..prev last..
for (var s = rule.symbols.length - 2; s >= 0; -- s) {
var prev = rule.symbols[s];
// add last.first to prev.follow
if (prev instanceof page.BNF.NT) {
if (page.merge(prev.follow, last.first)) changed = true;
// if last is nt and empty, add follow to prev.follow
if (last instanceof page.BNF.NT && last.empty)
if (page.merge(prev.follow, follow)) changed = true;
// move forward
follow = prev.follow;
}
// move forward
last = prev;
}
}
});
} while (changed);
else
do {
changed = false; // until no more changes
// for each rule beginning with rule zero $accept: start_nt $eof_literal
self.rules.forEach(function (rule) {
if (rule.symbols.length > 0)
// over rhs in reverse order
rule.symbols.reduceRight( function (follow, last) {
if (last instanceof page.BNF.NT) {
if (page.merge(last.follow, follow)) changed = true;
if (last.empty)
// if empty, pass follow further forward
return Object.assign({}, last.first, follow);
}
// otherwise, pass first forward
return last.first;
}, rule.nt.follow);
});
} while (changed);
};
/** Returns symbol by `ord`.
*
* @param {number} _ord {@link page.BNF.NT} or {@link page.BNF.T}'s `ord`.
*
* @return {Node}
*/
page.BNF.prototype.getByOrd = function (_ord) {
this.assert('__WHERE__', typeof _ord == 'number');
this.assert('__WHERE__', _ord >= 0 && _ord < this.lits.length + this.tokens.length + this.nts.length);
if (_ord < this.lits.length) return this.lits[_ord];
if ((_ord -= this.lits.length) < this.tokens.length) return this.tokens[_ord];
if ((_ord -= this.tokens.length) < this.nts.length) return this.nts[_ord];
this.assert('__WHERE__', false);
};
/** The result of {@link page.BNF#tokenizer}
* @typedef Tokenizer
* @type {function(string): page.Tuple[]}
* @param {string} input to be divided into literals and tokens.
* @property {string} pattern read-only, used to disect the input,
* combines all tokens, literals and the `skip` pattern.
* @returns {page.Tuple[]} a list of literals and tokens.
*/
/** Creates a function which tokenizes a string.
*
* For literals with common prefixes the longer literal will be matched first.
* Token patterns are matched after literals and in the order the tokens were defined.
*
* @param {string|RegExp} [_skip] a pattern to define ignorable character sequences;
* this pattern must not accept empty input and it must use `(:? )`
* rather than `( )` for grouping; the default is to skip white space.
*
* @returns {Tokenizer} a function which takes a string and returns a list of {@link page.Tuple} elements.
* The list contains one tuple with the `error` token for each
* character which is neither ignorable nor a literal or part of a token.
* The function is based on a regular expression which can be displayed as a member `.pattern`.
*/
page.BNF.prototype.tokenizer = function (_skip) {
// make sure the closure has access to this
var self = this;
// none or valid skip pattern
!_skip || _skip instanceof RegExp ||
self.assert('__WHERE__', typeof _skip == 'string' && RegExp(_skip, 'gm') != null);
// replace most characters by \. or \x.. or \u....
function unicode (s) {
var result = '';
for (var i = 0; i < s.length; ++ i) {
var c = s.charAt(i);
if (c.search(/[a-zA-Z0-9_]/) >= 0)
result += c;
else if (c.search(/[\x20-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]/) >= 0)
result += '\\' + c;
else {
c = s.charCodeAt(i);
if (c < 16)
result += '\\x0' + c.toString(16);
else if (c < 256)
result += '\\x' + c.toString(16);
else if (c < 16*256)
result += '\\u0' + c.toString(16);
else
result += '\\u' + c.toString(16);
}
}
return result;
}
// pattern = ( token ) |.. ( literal |.. ) | ( skip )
var pattern = '|';
// tokens w/out $error
if (this.tokens.length > 1)
pattern += '(' +
this.tokens.slice(1).map(function (token) { return token.pattern.toString(); }).join(')|(') +
')|';
// literals w/out $eof
if (this.lits.length > 1)
pattern += '(' +
this.lits.slice(1).map(function (lit) { return unicode(lit.literal); }).join('|') +
')|';
// skip
pattern = (pattern + '(' + (_skip ? _skip.source : '\\s+') + ')').substr(1);
// returns number of \n in _s
function nl (_s) {
var result = 0, at;
for (var begin = 0;
(at = _s.substring(begin).indexOf('\n')) >= 0;
begin += at + 1)
++ result;
return result;
}
// the closure to be returned
function tokenizer (_input) {
self.assert('__WHERE__', typeof _input == 'string');
var result = new Array();
var lineno = 1;
var begin = pattern.lastIndex = 0;
while (true) {
scan: for (var m; (m = pattern.exec(_input)) != null &&
m.index == begin; begin = pattern.lastIndex) {
if (m[m.length - 1]) { // skip pattern
lineno += nl(m[m.length - 1]);
continue scan;
}
if (m[0] in self.litsByValue) {
result.push(new page.Tuple(lineno, self.litsByValue[m[0]], m[0]));
lineno += nl(m[0]);
continue scan;
}
for (var t = 1; t < self.tokens.length; ++ t) // without $error
if (m[t]) {
result.push(new page.Tuple(lineno, self.tokens[t], m[t]));
lineno += nl(m[t]);
continue scan;
}
self.assert('__WHERE__');
}
if (begin >= _input.length) // end of input
return result;
// error tuple
result.push(new page.Tuple(lineno, self.token(), _input.substr(begin, 1)));
if (_input.substring(begin, 1) == '\n') ++ lineno;
// ignore one character
pattern.lastIndex = ++ begin;
}
}
tokenizer.pattern = pattern; pattern = new RegExp(pattern, 'mg');
return tokenizer;
};
/** Factory method to create a unique literal, maintains {@link page.BNF}`.lit` and {@link page.BNF}`.litsByValue`.
*
* @param {string} [_literal] literal's representation;
* if omitted represents the `$eof` literal terminal.
*
* @returns {page.BNF.Lit} a unique literal.
*/
page.BNF.prototype.lit = function (_literal) {
if (arguments.length == 0)
_literal = '';
else {
this.assert('__WHERE__', arguments.length == 1);
this.assert('__WHERE__', typeof _literal == 'string' && _literal.length > 0);
}
// return existing literal
if (_literal in this.litsByValue)
return this.litsByValue[_literal];
// create new literal
var result = new page.BNF.Lit(_literal);
this.lits.push(result);
return this.litsByValue[_literal] = result;
};
/** Factory method to create a unique token, maintains {@link page.BNF}`.tokens` and {@link page.BNF}`.tokensByName`,
* checks against {@link page.BNF}`.ntsByName`.
*
* @param {string} [_name] token's name; cannot start with `$`.
* If omitted represents the `$error` token.
* @param {string} [_pattern] pattern to match values representing the token in input;
* must be specified if and only if the token is created.
* @returns {page.BNF.Token} a unique terminal category representation.
*/
page.BNF.prototype.token = function (_name, _pattern) {
if (arguments.length == 0) {
_name = '$error';
_pattern = null;
} else {
this.assert('__WHERE__', arguments.length >= 1);
this.assert('__WHERE__', typeof _name == 'string' && _name.length > 0 && _name[0] != '$');
this.assert('__WHERE__', arguments.length == 1 || (typeof _pattern == 'string' && RegExp(_pattern, 'gm') != null));
this.assert('__WHERE__', !(_name in this.ntsByName)); // non-terminal
this.assert('__WHERE__', (_name in this.tokensByName) != arguments.length > 1); // duplicate and forward reference
}
if (_name in this.tokensByName)
return this.tokensByName[_name];
var result = new page.BNF.Token(_name, _pattern);
this.tokens.push(result);
return this.tokensByName[_name] = result;
};
/** Factory method to create a unique non-terminal representation,
* maintains {@link page.BNF}`.nts` and {@link page.BNF}`.ntsByName`,
* checks against {@link page.BNF}`.tokensByName`.
*
* @param {string} [_name] non-terminal's name; cannot start with `$`.
* If `$` creates a unique name which cannot be searched for.
* If omitted represents the `$accept` non-terminal.
*
* @returns {page.BNF.NT} a unique non-terminal representation.
*/
page.BNF.prototype.nt = function (_name) {
if (arguments.length == 0)
_name = '$accept';
else if (_name == '$')
_name += this.nts.length;
else {
this.assert('__WHERE__', typeof _name == 'string' && _name.length > 0 && _name[0] != '$');
this.assert('__WHERE__', !(_name in this.tokensByName)); // token
}
if (_name in this.ntsByName)
return this.ntsByName[_name];
var result = new page.BNF.NT(_name, this.nts.length);
this.nts.push(result);
return this.ntsByName[_name] = result;
};
/** Factory method to create a rule representation for BNF.
* Adds to rule's non-terminal's {@link page.BNF.NT}`.rules`.
*
* @param {page.BNF.NT} _nt left-hand side, non-terminal.
* @param {Node[]} [_symbols] right-hand side, may be empty array;
* if omitted, creates empty rule 0 for `$accept`.
*
* @returns {page.BNF.Rule} a new rule representation.
*
* @see page.BNF.init
*/
page.BNF.prototype.rule = function (_nt, _symbols) {
var self = this;
if (arguments.length == 1) {
self.assert('__WHERE__', self.rules.length == 0 && _nt == self.nt());
_symbols = []; // filled in during init
} else {
self.assert('__WHERE__', _nt instanceof page.BNF.NT);
self.assert('__WHERE__', _symbols instanceof Array);
_symbols.forEach(function (symbol) {
self.assert('__WHERE__', symbol instanceof page.BNF.NT || symbol instanceof page.BNF.Lit || symbol instanceof page.BNF.Token);
});
}
var result = new page.BNF.Rule(_nt, _symbols);
// add new rule to rule's nt's rules
result.nt.rules.push(result);
return result;
};