"use strict"; /** @fileOverview Implements `class page.SLR1`. * @author Axel T. Schreiner <ats@cs.rit.edu> * @version 1.5.1 */ /** Wraps a suitable grammar for bottom-up parsing. * The {@link page.BNF} grammar is checked to be SLR(1). * The state/symbol/action table is created which results in the discovery of any conflicts. * * `shift` actions are generated for all terminals anticipated by traversing the grammar. * `reduce` actions are generated for all terminals in the `follow` set of a rule. * * `reduce`/`reduce` conflicts are reported and resolved in favor of the rule * defined first. `shift`/`reduce` conflicts are reported and resolved in favor of `shift` * unless they can be resolved through precedences. * * If precedences are available, a rule is reduced if it has higher precedence * than a `follow` terminal. The `follow` terminal is shifted if it has * higher precedence than the rule. If the precedences are equal, a left-associative * rule is reduced, a right-associative rule causes the `follow` terminal * to be shifted, and for a non-associative rule the `follow` terminal is considered an error, * i.e., it is removed from the action table. * * All rules are checked that they can be reduced: * The constructor deletes a `boolean` field `.reduced` from all rules of the {@link page.BNF} grammar. * It then creates state 0 from rule zero marked in position zero and tells * each state to {@link page.SLR1.State#advance}. * This adds a `boolean` field `.reduced` to each {@link page.BNF.Rule} * which can be reduced. If rules cannot be reduced they will be reported as errors. * * @class Tests and wraps a SLR(1) grammar for bottom-up parsing. * An object of this class supports one execution of the parsing algorithm at any one time. * The object is serially reusable. * * {@link page.SLR1#parse} is a stack-based parsing algorithm for {@link page.BNF} grammars * (which can be derived from {@link page.EBNF} grammars). * * @param {page.BNF|page.EBNF} _grammar to be used for parsing. * Left-recursive grammars and precedence grammars are more efficient. * The underlying BNF (precedence) grammar is checked whether it is SLR(1). * @param {Observer} [_observer] to observe recognition. * * @property {page.BNF} bnf BNF (precedence) grammar to control recognition. * @property {page.EBNF} ebnf EBNF (precedence) grammar, if available. * @property {Observer} observer to observe recognition. * @property {page.SLR1.State[]} states state/symbol/action table. * @property {number} errors number of errors in creating the state table. * @property {number} sr number of shift/reduce conflicts. * @property {number} rr number of reduce/reduce conflicts. * * @property {object[]} stack currently active states and associated values, * exists only while {@link page.SLR1#parse} is used. * @property {number} stack.state state number. * @property {object} stack.value returned by observer for `shift` and `goto` into the state. * * @see page.SLR1.State#advance * @see page.BNFP * @see page.EBNFP */ page.SLR1 = function (_grammar, _observer) { var self = this; self.assert('__WHERE__', _grammar instanceof page.BNF); !_observer || self.assert('__WHERE__', typeof _observer == 'function'); if (_grammar instanceof page.EBNF) { self.ebnf = _grammar; self.bnf = _grammar.bnf; // import error count if any self.errors = self.ebnf.errors; self.ebnf.errors = 0; } else { self.ebnf = undefined; self.bnf = _grammar; // import error count if any self.errors = self.bnf.errors; self.bnf.errors = 0; } if (_observer) self.observer = _observer; self.states = []; self.sr = 0; self.rr = 0; // delete .reduced fields self.bnf.rules.forEach(function (rule) { delete rule.reduced; }); // create state[0]: mark rule 0 in position 0 self.states.push(self.state([self.config(self.bnf.rules[0], 0)])); // tell each state to advance; self creates new states which are also advanced for (var s = 0; s < self.states.length; ++ s) self.states[s].advance(self, s); // check that all rules can be reduced self.bnf.rules.forEach(function (rule, r) { if (!rule.reduced) self.error('rule', r, '(' + rule + ')', 'is never reduced'); }); }; page.baseclass(page.SLR1, 'page.SLR1'); /** Delegates to `.bnf` to create a function which tokenizes a string. * * @param {string|RegExp} [_skip] a pattern to define ignorable character sequences; * this pattern must not accept empty input and it must use `(:? )` * rather than `( )` for grouping; the default is to skip white space. * * @returns {Tokenizer} a function which takes a string and returns a list of {@link page.Tuple} elements. * The list contains one tuple with the `error` token for each * character which is neither ignorable nor a literal or part of a token. * The function is based on a regular expression which can be displayed as a member `.pattern`. * * @see page.BNF#tokenizer */ page.SLR1.prototype.tokenizer = function (_skip) { return this.bnf.tokenizer(_skip); }; /** Displays state table, number of errors and conflicts, if any, and delegates to the grammar. * @override * * @returns {string} */ page.SLR1.prototype.toString = function () { return this.states.map(function (state, n) { return 'state ' + n + '\n' + state.toString(); }).join('\n\n') + '\n' + (this.errors ? '\nerrors: ' + this.errors : '') + (this.sr ? '\nshift/reduce conflicts: ' + this.sr : '') + (this.rr ? '\nreduce/reduce conflicts: ' + this.rr : '') + (this.errors + this.sr + this.rr ? '\n\n' : '\n') + (this.ebnf ? this.ebnf.toString() : this.bnf.toString()); }; /** Displays state table with closures, number of errors and conflicts, and delegates to the grammar. * @override * * @returns {string} */ page.SLR1.prototype.dump = function () { var self = this; var result = ''; if ('stack' in self) result = 'active stack:\n ' + self.stack.map(function(sv) { var state = sv.state, value = sv.value; value = value == null ? 'null' : value instanceof Array ? '[ ] ' + value : (typeof value).substr(0, 3) + ' ' + value; if (value.length > 23) value = value.substr(0, 23); return (state + ' ').substr(0, 4) + ' ' + value; }).join('\n ') + '\n\n'; return result + self.states.map(function (state, n) { return 'state ' + n + '\n' + state.dump(); }).join('\n\n') + '\n' + (self.errors ? '\nerrors: ' + self.errors : '') + (self.sr ? '\nshift/reduce conflicts: ' + self.sr : '') + (self.rr ? '\nreduce/reduce conflicts: ' + self.rr : '') + (self.errors + self.sr + self.rr ? '\n\n' : '\n') + (self.ebnf ? self.ebnf.dump() : self.bnf.dump()); }; /** Factory method to create the accept action for `$eof`. * * @returns {page.SLR1.Action} an object representing the action. */ page.SLR1.prototype.accept = function () { return new page.SLR1.Action('accept', this.bnf.lit(), ''); }; /** Factory method to create a reduce action. * * @param {page.BNF.T} _t terminal on which to take the action. * @param {page.BNF.Rule} _rule rule to reduce. * * @returns {page.SLR1.Action} an object representing the action. */ page.SLR1.prototype.reduce = function (_t, _rule) { this.assert('__WHERE__', _t instanceof page.BNF.T); this.assert('__WHERE__', _rule instanceof page.BNF.Rule); return new page.SLR1.Action('reduce', _t, _rule); }; /** Factory method to create a shift or goto action. * * @param {page.BNF.T|page.BNF.NT} _symbol on which to take the action. * @param {number} _state state to shift to. * * @returns {page.SLR1.Action} an object representing the action. */ page.SLR1.prototype.shift_or_goto = function (_symbol, _state) { this.assert('__WHERE__', _symbol instanceof page.BNF.T || _symbol instanceof page.BNF.NT); this.assert('__WHERE__', typeof _state == 'number'); return new page.SLR1.Action(_symbol instanceof page.BNF.T ? 'shift' : 'goto', _symbol, _state); }; /** Factory method to represent a mark in a rule. * * @param {page.BNF.Rule} _rule rule to mark. * @param {number} _position position in rule, before a symbol or after all. * * @returns {page.SLR1.Config} an object representing the marked rule. */ page.SLR1.prototype.config = function (_rule, _position) { this.assert('__WHERE__', _rule instanceof page.BNF.Rule); this.assert('__WHERE__', typeof _position == 'number' && _position >= 0 && _position <= _rule.symbols.length); return new page.SLR1.Config(_rule, _position); }; /** Factory method to represent a state of the SLR(1) automaton. * The state is created from the core configurations; * rules in the closure are added. * * @param {page.SLR1.Config[]} _core list of configurations in the core. * * @returns {page.SLR1.State} an object representing the state. */ page.SLR1.prototype.state = function (_core) { var self = this; self.assert('__WHERE__', _core instanceof Array && _core.length > 0); _core.forEach(function (config) { self.assert('__WHERE__', config instanceof page.SLR1.Config); }); var coreLength = _core.length; // for efficient state comparison var actions = {}; // initialized to map all possible next symbols to null // compute closure: loop over core and added configurations for (var c = 0; c < _core.length; ++ c) // for each incomplete configuration if (!_core[c].complete) { // next symbol in a configuration var s = _core[c].rule.symbols[_core[c].position]; if (s instanceof page.BNF.NT && !(s.ord in actions)) // add all rules for a new non-terminal, marked at 0 s.rules.forEach(function (rule) { _core.push(self.config(rule, 0)); }); // map this next terminal or non-terminal to null actions[s.ord] = null; } return new page.SLR1.State(_core, coreLength, actions); }; /** Checks bottom-up if input conforms to a BNF grammar. * For an EBNF grammar this method uses the corresponding BNF grammar. * This method can be called repeatedly with portions of input. * * The method uses a state stack and attempts error recovery. * ??If an input symbol cannot be matched, the parser function will search the rest of the current * and all outer active rules for a possible match with the input symbol. * If there is no match, the input symbol is discarded and the search is tried again * until an `irrecoverable error` is reported at end of input. * * ##### Create parsers and scanner * var parser = new page.SLR1(page.BNF.from(' ... grammar described in BNF ...')); * var parser = new page.SLR1(page.EBNF.from(' ... grammar described in EBNF ...')); * var scanner = parser.tokenizer(/ skip pattern /); * * ##### Parse -- all input * parser.parse(scanner('... input ...').concat([ null ])); * * ##### Parse -- push tuples * var result = parser.parse(scanner('... start of input ...')); * while (result === true) * result = parser.parse(scanner('... more input ...')); // eventually null input * * ##### Parse a stream -- pull tuples * function stream () { return scanner('... start/more input/null ...'); } * parser.parse(stream); * * ##### Trace * parser.parse(stream, page.trace()); * * ##### Collect token values * var result = parser.parse(stream, parser.reflect()); * print(result); // array with token values * * ##### Create observer for traced building * var environment = ... something useful for the builder ...; * var builder = { * ruleName: // matched by reflection * function (_values, _environment, _tuple, _sender) { * return ... anything * throw string for error message * throw Error with message to abort * } * ... * }; * var observer = page.trace(parser.reflect(environment, builder)); * * ##### Build * var result = parser.parse(stream, observer); * print(result); // array with builder value(s) * * @param {page.Tuple[]|Tokenizer} _input either the next portion of input * and `null` or an empty array will match `$eof`, or a {@link Tokenizer} which will be called *without arguments* * and each time must return the next portion of input, `null`, or an empty array. * @param {Observer} [_observer] to observe recognition; * cannot be specified once parsing is in progress. * * @returns {boolean|object} `true` to request another input array, * or the top-level value (which could be `null`) returned by the observer, if any, when the end of input is reached. * * @throws {Error} a message from the observer to abort parsing. * * @see page.trace */ page.SLR1.prototype.parse = function (_input, _observer) { var self = this; // closure var current = null; // current input tuple, if any var tuples = []; // available tuples var index = 0; // index of next tuple // set current to a new tuple, but not past $eof // throw true to ask for more input function next () { if (index >= tuples.length) { // no tuples left if (typeof _input == 'function') // call back for more tuples = _input(); else if (_input) { // consume argument tuples = _input; _input = false; } else if (_input === false) // throw true for more input throw true; else // null input: EOF tuples = null; if (!tuples || tuples.length == 0) // EOF tuples = [ new page.Tuple(0, self.bnf.lit(), '') ]; index = 0; } // use 'index' tuple if ((current = tuples[index]) == null) current = tuples[index] = new page.Tuple(0, self.bnf.lit(), ''); // advance index (to later tokenize more) but don't run past $eof tuple if (current.t != self.bnf.lit()) ++ index; } self.assert('__WHERE__', typeof _input != 'boolean'); if ('stack' in self) { // multiple calls in progress, only for tuple array, no new observer !_input || self.assert('__WHERE__', _input instanceof Array); self.assert('__WHERE__', !_observer); } else { !_input || self.assert('__WHERE__', _input instanceof Array || typeof _input == 'function'); !_observer || self.assert('__WHERE__', typeof _observer == 'function'); if (_observer) self.observer = _observer; // begin parsing: create stack and initialize with state 0 self.stack = [ { state: 0 } ]; } self.assert('__WHERE__', self.stack.length > 0); var state = self.states[self.stack[self.stack.length - 1].state]; // current state // inform observer, if any // if error action: print result or info if any; return null // rethrow Error; return result or null function observe (_tuple, _action, _info) { self.assert('__WHERE__', _tuple instanceof page.Tuple); var result = _action == 'error' ? _info : null; if (self.observer) try { result = self.observer(self.stack[self.stack.length - 1].state, _tuple, _action, _info, self); } catch (e) { if (typeof e == 'string') self.error(e); else { self.assert('__WHERE__', e instanceof Error) throw 'lineNumber' in e ? new Error('('+e.lineNumber+') ' + e.message) : e; } } if (_action == 'error') { if (result) self.error(result); return null; } return result; } // perform _action on _tuple, consume on shift // return true/false for shift/reduce, i.e., true if _tuple is consumed // throw [ result ] on accept function perform (_action, _tuple) { self.assert('__WHERE__', _action.action != 'goto'); var result = observe(_tuple, _action.action, _action.info); switch (_action.action) { case 'accept': throw [ result ]; // success case 'shift': self.assert('__WHERE__', typeof _action.info == 'number' && _action.info >= 0 && _action.info < self.states.length); self.stack.push({ state: _action.info, value: result }); // shift to new state state = self.states[_action.info]; return true; // _tuple consumed case 'reduce': self.assert('__WHERE__', _action.info instanceof page.BNF.Rule); self.assert('__WHERE__', self.stack.length > _action.info.symbols.length); // pop the stack by the length of the rule, uncover state self.stack.length -= _action.info.symbols.length; state = self.states[self.stack[self.stack.length - 1].state]; // there has to be a goto for the non-terminal self.assert('__WHERE__', _action.info.nt.ord in state.actions); var g = state.actions[_action.info.nt.ord]; self.assert('__WHERE__', g.action == 'goto'); self.assert('__WHERE__', typeof g.info == 'number' && g.info >= 0 && g.info < self.states.length); observe(_tuple, g.action, g.info); // observe the goto self.stack.push({ state: g.info, value: result }); // goto to new state state = self.states[g.info]; return false; // _tuple still available } self.assert('__WHERE__'); // illegal action } // send one 'error' with an error message. // work on input and stack until a shift $error and shift current are done; // send 'error' with null message every time the stack is popped. // throw [ true ] for more input // throw [ result ] for $accept // throw [ 'irrecoverable error' ] if the stack is empty. // return when current was consumed to resume normal processing. function recover () { // produce a message listing expected symbols in state table var msg = current + ' is not allowed\nexpecting:'; for (var a in state.actions) if (state.actions[a].symbol instanceof page.BNF.T && state.actions[a].symbol != self.bnf.token()) // not $error msg += ' ' + state.actions[a].symbol; // run the message past the observer (which may throw a string). observe(current, 'error', msg); // drop any $error sent by the tokenizer while (current.t == self.bnf.token()) next(); // next tuple, throws true for more input // now try to find a shift action for $error stackLoop: while (self.stack.length > 1) { // as long as state has an action for $error while (self.bnf.token().ord in state.actions) // perform that action if (perform(state.actions[self.bnf.token().ord], new page.Tuple(current.lineno, self.bnf.token(), null))) // did shift $error while (true) { // first discard input until state has an action for current while (!(current.t.ord in state.actions)) { self.message('discarding ' + current); if (current.t == self.bnf.lit()) // $eof break stackLoop; next(); // next tuple, throws true for more input } // perform the action for current if (perform(state.actions[current.t.ord], current)) { next(); // did shift current return; // ready for another tuple } // keep discarding if there's a reduce/goto before current } // keep trying if there's a reduce/goto before $error self.stack.pop(); state = self.states[self.stack[self.stack.length - 1].state]; } throw [ 'irrecoverable error' ]; } // call/cc: throw [ result ] to return result from the parser try { while (true) { if (!current) next(); // ascertain input self.assert('__WHERE__', self.stack.length > 0); if (current.t != self.bnf.token() // not $error tuple && current.t.ord in state.actions) { // anticipated input if (perform(state.actions[current.t.ord], current)) next(); // consumed } else recover(); } } catch (outcome) { if (outcome === true) return true; // more input // end parsing: remove stack delete self.stack; if (outcome instanceof Array) return outcome[0]; self.assert('__WHERE__', outcome instanceof Error); throw 'lineNumber' in outcome ? new Error('('+outcome.lineNumber+') ' + outcome.message) : outcome; } }; /** Creates a function which can observe {@link page.SLR1#parse}. * * The observer function collects a list of all token values and gives each rule a chance to structure the list of values * collected for the rule. Conceptually, it works exactly like {@link page.LL1#reflect}, i.e., it maintains a stack of value lists as follows: * * | action | effect | * |--------|--------| * | `'shift'` | ignores a current literal or pushes the current token's associated string value onto the top-level value list and returns `null`. * | `'reduce'` | pops the top-level value list, interacts with `_obj` if possible; returns the top-level value list or the interaction result. * | `'goto'` | concatenates or appends the result of the preceding `'reduce'` to the next level value list and returns null. * | `'accept'` | returns the top-level value list. * | `'error'` | returns the error message. * * For `'reduce'`, if a property with the rule's non-terminal's name can be found through `_obj`, * it should be a {@link Builder} function which is then called with `_env` and the popped top-level value list * which contains the values collected for the rule in order. * * For the immediately following `'goto'`, * if the `'reduce'` result is an array it is concatenated to the next level value list; * otherwise the result is pushed onto the next level value list. * * The name of the non-terminal is searched in the properties of `_obj` or by inheritance, i.e., * if one property name is `{}` it's associated value is searched recursively. * * @param {object} [_env] environment, passed through to the functions found through the properties in `_obj`. * @param {object} [_obj] collection of {@link Builder} functions, i.e., semantic actions. * * @returns {Observer} a function to observe a parser. * * @see page.trace */ page.SLR1.prototype.reflect = function (_env, _obj) { var self = this; // closure return function (_at, _tuple, _action, _info, _sender) { self.assert('__WHERE__', typeof _at == 'number'); self.assert('__WHERE__', _tuple instanceof page.Tuple); self.assert('__WHERE__', typeof _action == 'string'); !_info || self.assert('__WHERE__', typeof _info == 'string' || typeof _info == 'number' || _info instanceof page.BNF.Rule || _info instanceof page.BNF.T); self.assert('__WHERE__', _sender === self); switch (_action) { case 'shift': // return token's input string or null return _tuple.t instanceof page.BNF.Token && _tuple.t != self.bnf.token() ? _tuple.value : null; case 'reduce': page.assert('__WHERE__', _info instanceof page.BNF.Rule); page.assert('__WHERE__', self.stack.length >= _info.symbols.length); var len = _info.symbols.length, values = [ ]; if (len) { // take len values in order off the stack var slice = self.stack.slice(- len); // concatenate the values, ignore null for (var s in slice) if ((slice[s].value) != null) values = values.concat(slice[s].value); // flatten array, append value } // reflect for (var o = _obj; o; o = o['{}']) if (_info.nt.name in o) { return o[_info.nt.name](values, _env, _tuple, _sender); } return values; case 'goto': return null; case 'accept': self.assert('__WHERE__', self.stack.length == 2); return self.stack.pop().value; case 'error': // return if non-empty, pop if empty page.assert('__WHERE__', self.stack.length > 0); page.assert('__WHERE__', !_info || typeof _info == 'string'); return _info; } self.assert('__WHERE__'); }; }; /** Creates a function which can observe * {@link page.SLR1#parse}. Unlike {@link page.SLR1#reflect} * this observer collects the values associated with *all* * terminal symbols and offers them to `_obj`. * * @param {object} [_env] environment, passed through to the functions found through the properties in `_obj`. * @param {object} [_obj] collection of {@link Builder} functions, i.e., semantic actions. * * @returns {Observer} a function to observe a parser. * * @see page.trace */ page.SLR1.prototype.yaccpar = function (_env, _obj) { var self = this; // closure return function (_at, _tuple, _action, _info, _sender) { self.assert('__WHERE__', typeof _at == 'number'); self.assert('__WHERE__', _tuple instanceof page.Tuple); self.assert('__WHERE__', typeof _action == 'string'); !_info || self.assert('__WHERE__', typeof _info == 'string' || typeof _info == 'number' || _info instanceof page.BNF.Rule || _info instanceof page.BNF.T); self.assert('__WHERE__', _sender === self); switch (_action) { case 'shift': // return associated terminal value return _tuple.value; case 'reduce': self.assert('__WHERE__', _info instanceof page.BNF.Rule); // copy the values var len = _info.symbols.length; var values = len > 0 ? self.stack.slice(- len).map(function (sv) { return sv.value; }) : []; for (var o = _obj; o; o = o['{}']) if (_info.nt.name in o) { return o[_info.nt.name](values); } // default: $$ = $1 return len ? values[0] : null; case 'goto': return null; case 'accept': self.assert('__WHERE__', self.stack.length == 2); return self.stack.pop().value case 'error': // return if non-empty, pop if empty self.assert('__WHERE__', self.stack.length > 0); self.assert('__WHERE__', !_info || typeof _info == 'string'); return _info; } self.assert('__WHERE__'); }; };