"use strict"; /** @fileOverview Implements `class page.BNF`. * @author Axel T. Schreiner <ats@cs.rit.edu> * @version 1.5.1 */ /** Creates a new grammar representation; creates the `$accept` non-terminal, `$eof` *end of file* literal, and `error` token. * @class Represents a grammar as ordered pairs; can compute first and follow sets and generate a tokenizer. * * @property {page.BNF.T[]} lits list of unique literals, can be pushed. * @property {Map} litsByValue maps value to unique literal; * transient, maintained by {@link page.BNF#lit}. * @property {page.BNF.T[]} tokens list of unique tokens, can be pushed. * @property {Map} tokensByName maps name to unique token; * transient, maintained by {@link page.BNF#token}. * @property {page.BNF.NT[]} nts list of unique non-terminals, can be pushed. * @property {Map} ntsByName maps name to unique non-terminal; * transient, maintained by {@link page.BNF#nt}. * @property {page.BNF.Rule[]} rules list of grammar rules, can be pushed. * @property {number} errors counted by {@link page.error} method; transient. */ page.BNF = function () { this.lits = [ ]; this.litsByValue = { }; this.tokens = [ ]; this.tokensByName = { }; this.nts = [ ]; this.ntsByName = { }; this.rules = []; this.errors = 0; // create rule zero and the singletons. this.rules.push( this.rule(this.nt()) ); // rule zero: $accept, completed later this.lit(); // $eof this.token(); // $error }; page.baseclass(page.BNF, 'page.BNF'); /** Factory method: Represent a grammar described in BNF. * This is bootstrapped using {@link page.BNF.ctor} and {@link page.BNF.builder} * to create an LL(1) compiler for BNF. * @static * * @param {string} _grammar description in BNF; * comments extend from `#` to the end of a line. * * @returns {page.BNF} a new grammar representation; check `.errors`. */ page.BNF.from = function (_grammar) { page.assert('__WHERE__', typeof _grammar == 'string'); // create a compiler which recognizes BNF var bnfCompiler = new page.LL1(page.BNF.ctor()); page.assert('__WHERE__', bnfCompiler.ambiguous == 0 && bnfCompiler.greedy == 0 && bnfCompiler.errors == 0); // build the new grammar var bnf = new page.BNF(); try { bnfCompiler.parse(bnfCompiler.tokenizer(/\s+|#.*/)(_grammar).concat([null]), bnfCompiler.reflect(bnf, page.BNF.builder)); } catch (e) { page.assert('__WHERE__', e instanceof Error); bnfCompiler.error(e); } bnf.errors += bnfCompiler.errors; return bnf; }; /** Nodes in a BNF grammar. * @typedef Node * @type page.BNF.NT|page.BNF.Lit|page.BNF.Token */ /** The BNF grammar manually represented with {@link page.BNF}. * @static * * @returns {page.BNF} the right-recursive BNF grammar for BNF, hand-crafted. */ page.BNF.ctor = function () { var g = new page.BNF(); g.init(g.nt('grammar'), [ g.rule(g.nt('grammar'), [ g.nt('tokens'), g.nt('rules') ]), g.rule(g.nt('tokens'), [ ]), g.rule(g.nt('tokens'), [ g.nt('token'), g.nt('tokens') ]), g.rule(g.nt('token'), [ g.lit('%token'), g.token('Name', '[a-zA-Z][-a-zA-Z0-9_]*'), // alphanum with _ and - g.token('Literal', "'(?:[^'\n\\\\]|\\\\['n\\\\])+'"), // single quoted g.lit(';') ]), g.rule(g.nt('rules'), [ g.nt('rule'), g.nt('more_rules') ]), g.rule(g.nt('more_rules'), [ ]), g.rule(g.nt('more_rules'), [ g.nt('rule'), g.nt('more_rules') ]), g.rule(g.nt('rule'), [ g.nt('nt'), g.lit(':'), g.nt('rhs'), g.lit(';') ]), g.rule(g.nt('nt'), [ g.token('Name') ]), g.rule(g.nt('rhs'), [ ]), g.rule(g.nt('rhs'), [ g.nt('token_or_nt' ), g.nt('rhs') ]), g.rule(g.nt('rhs'), [ g.nt('lit'), g.nt('rhs') ]), g.rule(g.nt('token_or_nt'), [ g.token('Name') ]), g.rule(g.nt('lit'), [ g.token('Literal') ]) ]); return g; }; { /** The right-recursive BNF grammar for BNF. * @static */ page.BNF.grammar = " %token Name '[a-zA-Z][-a-zA-Z0-9_]*'; \n" + " %token Literal '\\'(?:[^\\'\\n\\\\\\\\]|\\\\\\\\[\\'n\\\\\\\\])+\\''; \n" + " grammar: tokens rules; \n" + " tokens: ; \n" + " tokens: token tokens; \n" + " token: '%token' Name Literal ';'; \n" + " rules: rule more_rules; \n" + " more_rules: ; \n" + " more_rules: rule more_rules; \n" + " rule: nt ':' rhs ';'; \n" + " nt: Name; \n" + " rhs: ; \n" + " rhs: token_or_nt rhs; \n" + " rhs: lit rhs; \n" + " token_or_nt: Name; \n" + " lit: Literal; \n"; } /** An object with action functions corresponding to * {@link page.BNF.grammar} for {@link page.LL1#reflect} * which require a new {@link page.BNF} as environment to represent * a grammar in BNF notation using this environment. * @static */ page.BNF.builder = { grammar: // tokens rules function (_values, _bnf) { _bnf.init(null, _values); return _bnf; }, // tokens: // tokens: token tokens // defaulted, collects empty list token: // '%token' Name Literal ';' function (_values, _bnf, _tuple) { if (_values[0] in _bnf.tokensByName) _bnf.error('('+_tuple.lineno+')', _values[0], 'is already defined'); else _bnf.token(_values[0], page.unescape(_values[1])); return []; // only collect tokens in _bnf }, // rules: rule more_rules // more_rules: // more_rules: rule more_rules // defaulted, collects array of rules in order rule: // nt ':' rhs ';' function (_values, _bnf) { return _bnf.rule(_values[0], _values.slice(1)); }, nt: // Name function (_values, _bnf, _tuple) { if (_values[0] in _bnf.tokensByName) { _bnf.error('('+_tuple.lineno+')', _values[0], 'is already defined as a token'); _values[0] += '$'; // kludge } return _bnf.nt(_values[0]); }, // rhs: // rhs: token_or_nt rhs // rhs: lit rhs // defaulted, collects array of items in order token_or_nt: // Name function (_values, _bnf) { if (_values[0] in _bnf.tokensByName) return _bnf.token(_values[0]); return _bnf.nt(_values[0]); }, lit: // Literal function (_values, _bnf) { return _bnf.lit(page.unescape(_values[0])); } }; /** Displays description of grammar and number of errors if any. * @returns {string} */ page.BNF.prototype.toString = function () { var result = ''; result += this.rules.map(function (rule, n) { return (n < 10 ? ' ' : '') + n + ' ' + rule; }).join('\n'); result += '\n\nnon-terminals: ' + this.nts.join(' '); result += '\nliterals: ' + this.lits.join(' '); result += '\ntokens:\t' + this.tokens.map(function (token) { return token + ' ' + page.escape(token.pattern); }).join('\n\t'); if (this.errors > 0) result += '\n\nerrors: ' + this.errors; return result; }; /** Displays a grammar and all non-terminals with name and contents of all sets. * @returns {string} */ page.BNF.prototype.dump = function () { var result = this.rules.map(function (rule) { return rule.dump(); }).join('\n'); result += '\n\n' + this.nts.map(function (nt) { return nt.dump(); }).join('\n\n'); return result; }; /** Must be called once; common code to complete grammar initialization. * * Adds rules passed as a parameter to {@link page.BNF}`.rules`. * * Sorts literals in reverse order, i.e., longer ones before shorter prefixes. * Defines {@link page.BNF.T}`.ord` and {@link page.BNF.NT}`.ord` as globally * unique index (literals before tokens before non-terminals). * Sets {@link page.BNF.T}`.first` for each terminal to contain itself. * @private * * @param {page.BNF.Rule[]} [_rules] (additional) grammar rules. */ page.BNF.prototype.initTree = function (_rules) { // closure var self = this; // none or valid rules !_rules || _rules.forEach(function (rule) { self.assert('__WHERE__', rule instanceof page.BNF.Rule); }); // add to rules if any if (_rules) _rules.forEach(function (rule) { self.rules.push(rule); }); self.assert('__WHERE__', self.rules.length > 1); // sort in reverse, keep $eof in front self.lits = [ self.lits[0] ].concat(self.lits.slice(1).sort( function (_a, _b) { if (_a == _b) return 0; return _a.literal < _b.literal ? 1 : -1; })); // ord literals before tokens before non-terminals, create first for terminals self.lits.forEach(function (lit, n) { (lit.first = {})[ lit.ord = Number(n) ] = lit; }); self.tokens.forEach(function (token, n) { (token.first = {})[ token.ord = self.lits.length + Number(n) ] = token; }); self.nts.forEach(function (nt, n) { nt.ord = self.tokens.length + self.lits.length + Number(n); }); }; /** Must be called once to complete BNF grammar initialization. * * Completes rule 0 with a new start symbol `$accept` * to accept the grammar's original start symbol and then `$eof`. * * Sets {@link page.BNF.Rule}`.index` as index into the rules array. * Sets `empty` for each rule and its non-terminal which has an empty right-hand side. * * Checks that all non-terminals are defined, can be reached, * and cannot expand infinitely without generating terminals. * * Computes `first` and `follow` for all rules and non-terminals. * * @param {page.BNF.NT} [_start] start symbol of grammar, * default is the non-terminal of rule 1. * @param {page.BNF.Rule[]} [_rules] (additional) grammar rules. * * @see page.BNF#initTree */ page.BNF.prototype.init = function (_start, _rules) { var self = this; // none or valid start symbol !_start || self.assert('__WHERE__', _start instanceof page.BNF.NT); // common code self.initTree(_rules); // use once only: rule 0 not yet filled in self.assert('__WHERE__', self.rules[0].symbols.length == 0); // rule zero: $accept: start $eof self.rules[0].symbols.push(_start ? _start : self.rules[1].nt); self.rules[0].symbols.push(self.lit()); // set empty for all rules and their nt with empty rhs self.rules.forEach(function (rule) { if (rule.symbols.length == 0) rule.nt.empty = rule.empty = true; }); // index the rules self.rules.forEach(function (rule, n) { rule.index = Number(n); }); // check that all non-terminals are defined self.nts.forEach(function (nt) { if (!nt.rules.length) self.error(nt, 'is not defined by a rule'); }); // check recursively that all non-terminals can be reached, starting at rule 0 (function setReached (rule) { if (!rule.reached) { rule.reached = rule.nt.reached = true; // reached rule and rule's nt rule.symbols.forEach(function (nt) { // for each nt in rhs if (nt instanceof page.BNF.NT) nt.rules.forEach(setReached); // reached each rule for that nt, too }); } })(self.rules[0]); // check self.nts.forEach(function (nt) { if (!nt.reached) self.error(nt, 'cannot be reached from rule zero'); }); // check that all non-terminals have a finite expansion var changed; do { changed = false; // until no more changes self.rules.forEach(function (rule) { // check rule if (!rule.nt.finite && // undecided? rule.symbols.some(function (symbol) { // any terminal or finite non-terminal return symbol instanceof page.BNF.T || symbol.finite; })) changed = rule.finite = rule.nt.finite = true; // changed: finite! }); } while (changed); // check self.nts.forEach(function (nt) { if (!nt.finite && !nt.empty) self.error(nt, 'is infinitely recursive'); }); // compute first for non-terminals and rules do { changed = false; // until no more changes // for each rule with non-empty rhs self.rules.forEach(function (rule, r) { if (rule.symbols.length > 0) { // for each symbol in rhs, as long as they accept empty if (rule.symbols.every(function (symbol) { // add symbol's first to rule's first if (page.merge(rule.first, symbol.first)) changed = true; // continue only if nt accepts empty return symbol instanceof page.BNF.NT ? symbol.empty : false; })) // if all accept empty, so does rule if (!rule.empty) changed = rule.empty = true; // add rule's first to nt's first, ditto empty if (page.merge(rule.nt.first, rule.first)) changed = true; if (!rule.nt.empty && rule.empty) changed = rule.nt.empty = true; } }); } while (changed); // compute follow for non-terminals if (!Object.reduceRight || !Object.assign) do { changed = false; // until no more changes // for each rule beginning with rule zero $accept: start_nt $eof_literal self.rules.forEach(function (rule) { // over rhs in reverse order if (rule.symbols.length > 0) { // nt:..last add nt.follow to last.follow var last = rule.symbols[rule.symbols.length - 1], follow = rule.nt.follow; if (last instanceof page.BNF.NT) if (page.merge(last.follow, follow)) changed = true; // nt:..prev last.. for (var s = rule.symbols.length - 2; s >= 0; -- s) { var prev = rule.symbols[s]; // add last.first to prev.follow if (prev instanceof page.BNF.NT) { if (page.merge(prev.follow, last.first)) changed = true; // if last is nt and empty, add follow to prev.follow if (last instanceof page.BNF.NT && last.empty) if (page.merge(prev.follow, follow)) changed = true; // move forward follow = prev.follow; } // move forward last = prev; } } }); } while (changed); else do { changed = false; // until no more changes // for each rule beginning with rule zero $accept: start_nt $eof_literal self.rules.forEach(function (rule) { if (rule.symbols.length > 0) // over rhs in reverse order rule.symbols.reduceRight( function (follow, last) { if (last instanceof page.BNF.NT) { if (page.merge(last.follow, follow)) changed = true; if (last.empty) // if empty, pass follow further forward return Object.assign({}, last.first, follow); } // otherwise, pass first forward return last.first; }, rule.nt.follow); }); } while (changed); }; /** Returns symbol by `ord`. * * @param {number} _ord {@link page.BNF.NT} or {@link page.BNF.T}'s `ord`. * * @return {Node} */ page.BNF.prototype.getByOrd = function (_ord) { this.assert('__WHERE__', typeof _ord == 'number'); this.assert('__WHERE__', _ord >= 0 && _ord < this.lits.length + this.tokens.length + this.nts.length); if (_ord < this.lits.length) return this.lits[_ord]; if ((_ord -= this.lits.length) < this.tokens.length) return this.tokens[_ord]; if ((_ord -= this.tokens.length) < this.nts.length) return this.nts[_ord]; this.assert('__WHERE__', false); }; /** The result of {@link page.BNF#tokenizer} * @typedef Tokenizer * @type {function(string): page.Tuple[]} * @param {string} input to be divided into literals and tokens. * @property {string} pattern read-only, used to disect the input, * combines all tokens, literals and the `skip` pattern. * @returns {page.Tuple[]} a list of literals and tokens. */ /** Creates a function which tokenizes a string. * * For literals with common prefixes the longer literal will be matched first. * Token patterns are matched after literals and in the order the tokens were defined. * * @param {string|RegExp} [_skip] a pattern to define ignorable character sequences; * this pattern must not accept empty input and it must use `(:? )` * rather than `( )` for grouping; the default is to skip white space. * * @returns {Tokenizer} a function which takes a string and returns a list of {@link page.Tuple} elements. * The list contains one tuple with the `error` token for each * character which is neither ignorable nor a literal or part of a token. * The function is based on a regular expression which can be displayed as a member `.pattern`. */ page.BNF.prototype.tokenizer = function (_skip) { // make sure the closure has access to this var self = this; // none or valid skip pattern !_skip || _skip instanceof RegExp || self.assert('__WHERE__', typeof _skip == 'string' && RegExp(_skip, 'gm') != null); // replace most characters by \. or \x.. or \u.... function unicode (s) { var result = ''; for (var i = 0; i < s.length; ++ i) { var c = s.charAt(i); if (c.search(/[a-zA-Z0-9_]/) >= 0) result += c; else if (c.search(/[\x20-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e]/) >= 0) result += '\\' + c; else { c = s.charCodeAt(i); if (c < 16) result += '\\x0' + c.toString(16); else if (c < 256) result += '\\x' + c.toString(16); else if (c < 16*256) result += '\\u0' + c.toString(16); else result += '\\u' + c.toString(16); } } return result; } // pattern = ( token ) |.. ( literal |.. ) | ( skip ) var pattern = '|'; // tokens w/out $error if (this.tokens.length > 1) pattern += '(' + this.tokens.slice(1).map(function (token) { return token.pattern.toString(); }).join(')|(') + ')|'; // literals w/out $eof if (this.lits.length > 1) pattern += '(' + this.lits.slice(1).map(function (lit) { return unicode(lit.literal); }).join('|') + ')|'; // skip pattern = (pattern + '(' + (_skip ? _skip.source : '\\s+') + ')').substr(1); // returns number of \n in _s function nl (_s) { var result = 0, at; for (var begin = 0; (at = _s.substring(begin).indexOf('\n')) >= 0; begin += at + 1) ++ result; return result; } // the closure to be returned function tokenizer (_input) { self.assert('__WHERE__', typeof _input == 'string'); var result = new Array(); var lineno = 1; var begin = pattern.lastIndex = 0; while (true) { scan: for (var m; (m = pattern.exec(_input)) != null && m.index == begin; begin = pattern.lastIndex) { if (m[m.length - 1]) { // skip pattern lineno += nl(m[m.length - 1]); continue scan; } if (m[0] in self.litsByValue) { result.push(new page.Tuple(lineno, self.litsByValue[m[0]], m[0])); lineno += nl(m[0]); continue scan; } for (var t = 1; t < self.tokens.length; ++ t) // without $error if (m[t]) { result.push(new page.Tuple(lineno, self.tokens[t], m[t])); lineno += nl(m[t]); continue scan; } self.assert('__WHERE__'); } if (begin >= _input.length) // end of input return result; // error tuple result.push(new page.Tuple(lineno, self.token(), _input.substr(begin, 1))); if (_input.substring(begin, 1) == '\n') ++ lineno; // ignore one character pattern.lastIndex = ++ begin; } } tokenizer.pattern = pattern; pattern = new RegExp(pattern, 'mg'); return tokenizer; }; /** Factory method to create a unique literal, maintains {@link page.BNF}`.lit` and {@link page.BNF}`.litsByValue`. * * @param {string} [_literal] literal's representation; * if omitted represents the `$eof` literal terminal. * * @returns {page.BNF.Lit} a unique literal. */ page.BNF.prototype.lit = function (_literal) { if (arguments.length == 0) _literal = ''; else { this.assert('__WHERE__', arguments.length == 1); this.assert('__WHERE__', typeof _literal == 'string' && _literal.length > 0); } // return existing literal if (_literal in this.litsByValue) return this.litsByValue[_literal]; // create new literal var result = new page.BNF.Lit(_literal); this.lits.push(result); return this.litsByValue[_literal] = result; }; /** Factory method to create a unique token, maintains {@link page.BNF}`.tokens` and {@link page.BNF}`.tokensByName`, * checks against {@link page.BNF}`.ntsByName`. * * @param {string} [_name] token's name; cannot start with `$`. * If omitted represents the `$error` token. * @param {string} [_pattern] pattern to match values representing the token in input; * must be specified if and only if the token is created. * @returns {page.BNF.Token} a unique terminal category representation. */ page.BNF.prototype.token = function (_name, _pattern) { if (arguments.length == 0) { _name = '$error'; _pattern = null; } else { this.assert('__WHERE__', arguments.length >= 1); this.assert('__WHERE__', typeof _name == 'string' && _name.length > 0 && _name[0] != '$'); this.assert('__WHERE__', arguments.length == 1 || (typeof _pattern == 'string' && RegExp(_pattern, 'gm') != null)); this.assert('__WHERE__', !(_name in this.ntsByName)); // non-terminal this.assert('__WHERE__', (_name in this.tokensByName) != arguments.length > 1); // duplicate and forward reference } if (_name in this.tokensByName) return this.tokensByName[_name]; var result = new page.BNF.Token(_name, _pattern); this.tokens.push(result); return this.tokensByName[_name] = result; }; /** Factory method to create a unique non-terminal representation, * maintains {@link page.BNF}`.nts` and {@link page.BNF}`.ntsByName`, * checks against {@link page.BNF}`.tokensByName`. * * @param {string} [_name] non-terminal's name; cannot start with `$`. * If `$` creates a unique name which cannot be searched for. * If omitted represents the `$accept` non-terminal. * * @returns {page.BNF.NT} a unique non-terminal representation. */ page.BNF.prototype.nt = function (_name) { if (arguments.length == 0) _name = '$accept'; else if (_name == '$') _name += this.nts.length; else { this.assert('__WHERE__', typeof _name == 'string' && _name.length > 0 && _name[0] != '$'); this.assert('__WHERE__', !(_name in this.tokensByName)); // token } if (_name in this.ntsByName) return this.ntsByName[_name]; var result = new page.BNF.NT(_name, this.nts.length); this.nts.push(result); return this.ntsByName[_name] = result; }; /** Factory method to create a rule representation for BNF. * Adds to rule's non-terminal's {@link page.BNF.NT}`.rules`. * * @param {page.BNF.NT} _nt left-hand side, non-terminal. * @param {Node[]} [_symbols] right-hand side, may be empty array; * if omitted, creates empty rule 0 for `$accept`. * * @returns {page.BNF.Rule} a new rule representation. * * @see page.BNF.init */ page.BNF.prototype.rule = function (_nt, _symbols) { var self = this; if (arguments.length == 1) { self.assert('__WHERE__', self.rules.length == 0 && _nt == self.nt()); _symbols = []; // filled in during init } else { self.assert('__WHERE__', _nt instanceof page.BNF.NT); self.assert('__WHERE__', _symbols instanceof Array); _symbols.forEach(function (symbol) { self.assert('__WHERE__', symbol instanceof page.BNF.NT || symbol instanceof page.BNF.Lit || symbol instanceof page.BNF.Token); }); } var result = new page.BNF.Rule(_nt, _symbols); // add new rule to rule's nt's rules result.nt.rules.push(result); return result; };