"use strict";
/** @fileOverview Implements `class page.SLR1`.
* @author Axel T. Schreiner <ats@cs.rit.edu>
* @version 1.5.1
*/
/** Wraps a suitable grammar for bottom-up parsing.
* The {@link page.BNF} grammar is checked to be SLR(1).
* The state/symbol/action table is created which results in the discovery of any conflicts.
*
* `shift` actions are generated for all terminals anticipated by traversing the grammar.
* `reduce` actions are generated for all terminals in the `follow` set of a rule.
*
* `reduce`/`reduce` conflicts are reported and resolved in favor of the rule
* defined first. `shift`/`reduce` conflicts are reported and resolved in favor of `shift`
* unless they can be resolved through precedences.
*
* If precedences are available, a rule is reduced if it has higher precedence
* than a `follow` terminal. The `follow` terminal is shifted if it has
* higher precedence than the rule. If the precedences are equal, a left-associative
* rule is reduced, a right-associative rule causes the `follow` terminal
* to be shifted, and for a non-associative rule the `follow` terminal is considered an error,
* i.e., it is removed from the action table.
*
* All rules are checked that they can be reduced:
* The constructor deletes a `boolean` field `.reduced` from all rules of the {@link page.BNF} grammar.
* It then creates state 0 from rule zero marked in position zero and tells
* each state to {@link page.SLR1.State#advance}.
* This adds a `boolean` field `.reduced` to each {@link page.BNF.Rule}
* which can be reduced. If rules cannot be reduced they will be reported as errors.
*
* @class Tests and wraps a SLR(1) grammar for bottom-up parsing.
* An object of this class supports one execution of the parsing algorithm at any one time.
* The object is serially reusable.
*
* {@link page.SLR1#parse} is a stack-based parsing algorithm for {@link page.BNF} grammars
* (which can be derived from {@link page.EBNF} grammars).
*
* @param {page.BNF|page.EBNF} _grammar to be used for parsing.
* Left-recursive grammars and precedence grammars are more efficient.
* The underlying BNF (precedence) grammar is checked whether it is SLR(1).
* @param {Observer} [_observer] to observe recognition.
*
* @property {page.BNF} bnf BNF (precedence) grammar to control recognition.
* @property {page.EBNF} ebnf EBNF (precedence) grammar, if available.
* @property {Observer} observer to observe recognition.
* @property {page.SLR1.State[]} states state/symbol/action table.
* @property {number} errors number of errors in creating the state table.
* @property {number} sr number of shift/reduce conflicts.
* @property {number} rr number of reduce/reduce conflicts.
*
* @property {object[]} stack currently active states and associated values,
* exists only while {@link page.SLR1#parse} is used.
* @property {number} stack.state state number.
* @property {object} stack.value returned by observer for `shift` and `goto` into the state.
*
* @see page.SLR1.State#advance
* @see page.BNFP
* @see page.EBNFP
*/
page.SLR1 = function (_grammar, _observer) {
var self = this;
self.assert('__WHERE__', _grammar instanceof page.BNF);
!_observer || self.assert('__WHERE__', typeof _observer == 'function');
if (_grammar instanceof page.EBNF) {
self.ebnf = _grammar;
self.bnf = _grammar.bnf;
// import error count if any
self.errors = self.ebnf.errors; self.ebnf.errors = 0;
} else {
self.ebnf = undefined;
self.bnf = _grammar;
// import error count if any
self.errors = self.bnf.errors; self.bnf.errors = 0;
}
if (_observer) self.observer = _observer;
self.states = [];
self.sr = 0;
self.rr = 0;
// delete .reduced fields
self.bnf.rules.forEach(function (rule) { delete rule.reduced; });
// create state[0]: mark rule 0 in position 0
self.states.push(self.state([self.config(self.bnf.rules[0], 0)]));
// tell each state to advance; self creates new states which are also advanced
for (var s = 0; s < self.states.length; ++ s)
self.states[s].advance(self, s);
// check that all rules can be reduced
self.bnf.rules.forEach(function (rule, r) {
if (!rule.reduced)
self.error('rule', r, '(' + rule + ')', 'is never reduced');
});
};
page.baseclass(page.SLR1, 'page.SLR1');
/** Delegates to `.bnf` to create a function which tokenizes a string.
*
* @param {string|RegExp} [_skip] a pattern to define ignorable character sequences;
* this pattern must not accept empty input and it must use `(:? )`
* rather than `( )` for grouping; the default is to skip white space.
*
* @returns {Tokenizer} a function which takes a string and returns a list of {@link page.Tuple} elements.
* The list contains one tuple with the `error` token for each
* character which is neither ignorable nor a literal or part of a token.
* The function is based on a regular expression which can be displayed as a member `.pattern`.
*
* @see page.BNF#tokenizer
*/
page.SLR1.prototype.tokenizer = function (_skip) {
return this.bnf.tokenizer(_skip);
};
/** Displays state table, number of errors and conflicts, if any, and delegates to the grammar.
* @override
*
* @returns {string}
*/
page.SLR1.prototype.toString = function () {
return this.states.map(function (state, n) {
return 'state ' + n + '\n' + state.toString();
}).join('\n\n') + '\n' +
(this.errors ? '\nerrors: ' + this.errors : '') +
(this.sr ? '\nshift/reduce conflicts: ' + this.sr : '') +
(this.rr ? '\nreduce/reduce conflicts: ' + this.rr : '') +
(this.errors + this.sr + this.rr ? '\n\n' : '\n') +
(this.ebnf ? this.ebnf.toString() : this.bnf.toString());
};
/** Displays state table with closures, number of errors and conflicts, and delegates to the grammar.
* @override
*
* @returns {string}
*/
page.SLR1.prototype.dump = function () {
var self = this;
var result = '';
if ('stack' in self)
result = 'active stack:\n ' +
self.stack.map(function(sv) {
var state = sv.state, value = sv.value;
value = value == null ? 'null' :
value instanceof Array ? '[ ] ' + value :
(typeof value).substr(0, 3) + ' ' + value;
if (value.length > 23) value = value.substr(0, 23);
return (state + ' ').substr(0, 4) + ' ' + value;
}).join('\n ') + '\n\n';
return result +
self.states.map(function (state, n) {
return 'state ' + n + '\n' + state.dump();
}).join('\n\n') + '\n' +
(self.errors ? '\nerrors: ' + self.errors : '') +
(self.sr ? '\nshift/reduce conflicts: ' + self.sr : '') +
(self.rr ? '\nreduce/reduce conflicts: ' + self.rr : '') +
(self.errors + self.sr + self.rr ? '\n\n' : '\n') +
(self.ebnf ? self.ebnf.dump() : self.bnf.dump());
};
/** Factory method to create the accept action for `$eof`.
*
* @returns {page.SLR1.Action} an object representing the action.
*/
page.SLR1.prototype.accept = function () {
return new page.SLR1.Action('accept', this.bnf.lit(), '');
};
/** Factory method to create a reduce action.
*
* @param {page.BNF.T} _t terminal on which to take the action.
* @param {page.BNF.Rule} _rule rule to reduce.
*
* @returns {page.SLR1.Action} an object representing the action.
*/
page.SLR1.prototype.reduce = function (_t, _rule) {
this.assert('__WHERE__', _t instanceof page.BNF.T);
this.assert('__WHERE__', _rule instanceof page.BNF.Rule);
return new page.SLR1.Action('reduce', _t, _rule);
};
/** Factory method to create a shift or goto action.
*
* @param {page.BNF.T|page.BNF.NT} _symbol on which to take the action.
* @param {number} _state state to shift to.
*
* @returns {page.SLR1.Action} an object representing the action.
*/
page.SLR1.prototype.shift_or_goto = function (_symbol, _state) {
this.assert('__WHERE__', _symbol instanceof page.BNF.T || _symbol instanceof page.BNF.NT);
this.assert('__WHERE__', typeof _state == 'number');
return new page.SLR1.Action(_symbol instanceof page.BNF.T ? 'shift' : 'goto',
_symbol, _state);
};
/** Factory method to represent a mark in a rule.
*
* @param {page.BNF.Rule} _rule rule to mark.
* @param {number} _position position in rule, before a symbol or after all.
*
* @returns {page.SLR1.Config} an object representing the marked rule.
*/
page.SLR1.prototype.config = function (_rule, _position) {
this.assert('__WHERE__', _rule instanceof page.BNF.Rule);
this.assert('__WHERE__', typeof _position == 'number' &&
_position >= 0 && _position <= _rule.symbols.length);
return new page.SLR1.Config(_rule, _position);
};
/** Factory method to represent a state of the SLR(1) automaton.
* The state is created from the core configurations;
* rules in the closure are added.
*
* @param {page.SLR1.Config[]} _core list of configurations in the core.
*
* @returns {page.SLR1.State} an object representing the state.
*/
page.SLR1.prototype.state = function (_core) {
var self = this;
self.assert('__WHERE__', _core instanceof Array && _core.length > 0);
_core.forEach(function (config) { self.assert('__WHERE__', config instanceof page.SLR1.Config); });
var coreLength = _core.length; // for efficient state comparison
var actions = {}; // initialized to map all possible next symbols to null
// compute closure: loop over core and added configurations
for (var c = 0; c < _core.length; ++ c)
// for each incomplete configuration
if (!_core[c].complete) {
// next symbol in a configuration
var s = _core[c].rule.symbols[_core[c].position];
if (s instanceof page.BNF.NT && !(s.ord in actions))
// add all rules for a new non-terminal, marked at 0
s.rules.forEach(function (rule) {
_core.push(self.config(rule, 0));
});
// map this next terminal or non-terminal to null
actions[s.ord] = null;
}
return new page.SLR1.State(_core, coreLength, actions);
};
/** Checks bottom-up if input conforms to a BNF grammar.
* For an EBNF grammar this method uses the corresponding BNF grammar.
* This method can be called repeatedly with portions of input.
*
* The method uses a state stack and attempts error recovery.
* ??If an input symbol cannot be matched, the parser function will search the rest of the current
* and all outer active rules for a possible match with the input symbol.
* If there is no match, the input symbol is discarded and the search is tried again
* until an `irrecoverable error` is reported at end of input.
*
* ##### Create parsers and scanner
* var parser = new page.SLR1(page.BNF.from(' ... grammar described in BNF ...'));
* var parser = new page.SLR1(page.EBNF.from(' ... grammar described in EBNF ...'));
* var scanner = parser.tokenizer(/ skip pattern /);
*
* ##### Parse -- all input
* parser.parse(scanner('... input ...').concat([ null ]));
*
* ##### Parse -- push tuples
* var result = parser.parse(scanner('... start of input ...'));
* while (result === true)
* result = parser.parse(scanner('... more input ...')); // eventually null input
*
* ##### Parse a stream -- pull tuples
* function stream () { return scanner('... start/more input/null ...'); }
* parser.parse(stream);
*
* ##### Trace
* parser.parse(stream, page.trace());
*
* ##### Collect token values
* var result = parser.parse(stream, parser.reflect());
* print(result); // array with token values
*
* ##### Create observer for traced building
* var environment = ... something useful for the builder ...;
* var builder = {
* ruleName: // matched by reflection
* function (_values, _environment, _tuple, _sender) {
* return ... anything
* throw string for error message
* throw Error with message to abort
* }
* ...
* };
* var observer = page.trace(parser.reflect(environment, builder));
*
* ##### Build
* var result = parser.parse(stream, observer);
* print(result); // array with builder value(s)
*
* @param {page.Tuple[]|Tokenizer} _input either the next portion of input
* and `null` or an empty array will match `$eof`, or a {@link Tokenizer} which will be called *without arguments*
* and each time must return the next portion of input, `null`, or an empty array.
* @param {Observer} [_observer] to observe recognition;
* cannot be specified once parsing is in progress.
*
* @returns {boolean|object} `true` to request another input array,
* or the top-level value (which could be `null`) returned by the observer, if any, when the end of input is reached.
*
* @throws {Error} a message from the observer to abort parsing.
*
* @see page.trace
*/
page.SLR1.prototype.parse = function (_input, _observer) {
var self = this; // closure
var current = null; // current input tuple, if any
var tuples = []; // available tuples
var index = 0; // index of next tuple
// set current to a new tuple, but not past $eof
// throw true to ask for more input
function next () {
if (index >= tuples.length) { // no tuples left
if (typeof _input == 'function') // call back for more
tuples = _input();
else if (_input) { // consume argument
tuples = _input;
_input = false;
} else if (_input === false) // throw true for more input
throw true;
else // null input: EOF
tuples = null;
if (!tuples || tuples.length == 0) // EOF
tuples = [ new page.Tuple(0, self.bnf.lit(), '') ];
index = 0;
}
// use 'index' tuple
if ((current = tuples[index]) == null)
current = tuples[index] = new page.Tuple(0, self.bnf.lit(), '');
// advance index (to later tokenize more) but don't run past $eof tuple
if (current.t != self.bnf.lit()) ++ index;
}
self.assert('__WHERE__', typeof _input != 'boolean');
if ('stack' in self) {
// multiple calls in progress, only for tuple array, no new observer
!_input || self.assert('__WHERE__', _input instanceof Array);
self.assert('__WHERE__', !_observer);
} else {
!_input || self.assert('__WHERE__', _input instanceof Array || typeof _input == 'function');
!_observer || self.assert('__WHERE__', typeof _observer == 'function');
if (_observer) self.observer = _observer;
// begin parsing: create stack and initialize with state 0
self.stack = [ { state: 0 } ];
}
self.assert('__WHERE__', self.stack.length > 0);
var state = self.states[self.stack[self.stack.length - 1].state]; // current state
// inform observer, if any
// if error action: print result or info if any; return null
// rethrow Error; return result or null
function observe (_tuple, _action, _info) {
self.assert('__WHERE__', _tuple instanceof page.Tuple);
var result = _action == 'error' ? _info : null;
if (self.observer)
try {
result = self.observer(self.stack[self.stack.length - 1].state, _tuple, _action, _info, self);
} catch (e) {
if (typeof e == 'string')
self.error(e);
else {
self.assert('__WHERE__', e instanceof Error)
throw 'lineNumber' in e ? new Error('('+e.lineNumber+') ' + e.message) : e;
}
}
if (_action == 'error') {
if (result) self.error(result);
return null;
}
return result;
}
// perform _action on _tuple, consume on shift
// return true/false for shift/reduce, i.e., true if _tuple is consumed
// throw [ result ] on accept
function perform (_action, _tuple) {
self.assert('__WHERE__', _action.action != 'goto');
var result = observe(_tuple, _action.action, _action.info);
switch (_action.action) {
case 'accept':
throw [ result ]; // success
case 'shift':
self.assert('__WHERE__', typeof _action.info == 'number'
&& _action.info >= 0 && _action.info < self.states.length);
self.stack.push({ state: _action.info, value: result }); // shift to new state
state = self.states[_action.info];
return true; // _tuple consumed
case 'reduce':
self.assert('__WHERE__', _action.info instanceof page.BNF.Rule);
self.assert('__WHERE__', self.stack.length > _action.info.symbols.length);
// pop the stack by the length of the rule, uncover state
self.stack.length -= _action.info.symbols.length;
state = self.states[self.stack[self.stack.length - 1].state];
// there has to be a goto for the non-terminal
self.assert('__WHERE__', _action.info.nt.ord in state.actions);
var g = state.actions[_action.info.nt.ord];
self.assert('__WHERE__', g.action == 'goto');
self.assert('__WHERE__', typeof g.info == 'number'
&& g.info >= 0 && g.info < self.states.length);
observe(_tuple, g.action, g.info); // observe the goto
self.stack.push({ state: g.info, value: result }); // goto to new state
state = self.states[g.info];
return false; // _tuple still available
}
self.assert('__WHERE__'); // illegal action
}
// send one 'error' with an error message.
// work on input and stack until a shift $error and shift current are done;
// send 'error' with null message every time the stack is popped.
// throw [ true ] for more input
// throw [ result ] for $accept
// throw [ 'irrecoverable error' ] if the stack is empty.
// return when current was consumed to resume normal processing.
function recover () {
// produce a message listing expected symbols in state table
var msg = current + ' is not allowed\nexpecting:';
for (var a in state.actions)
if (state.actions[a].symbol instanceof page.BNF.T
&& state.actions[a].symbol != self.bnf.token()) // not $error
msg += ' ' + state.actions[a].symbol;
// run the message past the observer (which may throw a string).
observe(current, 'error', msg);
// drop any $error sent by the tokenizer
while (current.t == self.bnf.token())
next(); // next tuple, throws true for more input
// now try to find a shift action for $error
stackLoop: while (self.stack.length > 1) {
// as long as state has an action for $error
while (self.bnf.token().ord in state.actions)
// perform that action
if (perform(state.actions[self.bnf.token().ord],
new page.Tuple(current.lineno, self.bnf.token(), null)))
// did shift $error
while (true) {
// first discard input until state has an action for current
while (!(current.t.ord in state.actions)) {
self.message('discarding ' + current);
if (current.t == self.bnf.lit()) // $eof
break stackLoop;
next(); // next tuple, throws true for more input
}
// perform the action for current
if (perform(state.actions[current.t.ord], current)) {
next(); // did shift current
return; // ready for another tuple
}
// keep discarding if there's a reduce/goto before current
}
// keep trying if there's a reduce/goto before $error
self.stack.pop();
state = self.states[self.stack[self.stack.length - 1].state];
}
throw [ 'irrecoverable error' ];
}
// call/cc: throw [ result ] to return result from the parser
try {
while (true) {
if (!current) next(); // ascertain input
self.assert('__WHERE__', self.stack.length > 0);
if (current.t != self.bnf.token() // not $error tuple
&& current.t.ord in state.actions) { // anticipated input
if (perform(state.actions[current.t.ord], current))
next(); // consumed
} else
recover();
}
} catch (outcome) {
if (outcome === true) return true; // more input
// end parsing: remove stack
delete self.stack;
if (outcome instanceof Array) return outcome[0];
self.assert('__WHERE__', outcome instanceof Error);
throw 'lineNumber' in outcome ? new Error('('+outcome.lineNumber+') ' + outcome.message) : outcome;
}
};
/** Creates a function which can observe {@link page.SLR1#parse}.
*
* The observer function collects a list of all token values and gives each rule a chance to structure the list of values
* collected for the rule. Conceptually, it works exactly like {@link page.LL1#reflect}, i.e., it maintains a stack of value lists as follows:
*
* | action | effect |
* |--------|--------|
* | `'shift'` | ignores a current literal or pushes the current token's associated string value onto the top-level value list and returns `null`.
* | `'reduce'` | pops the top-level value list, interacts with `_obj` if possible; returns the top-level value list or the interaction result.
* | `'goto'` | concatenates or appends the result of the preceding `'reduce'` to the next level value list and returns null.
* | `'accept'` | returns the top-level value list.
* | `'error'` | returns the error message.
*
* For `'reduce'`, if a property with the rule's non-terminal's name can be found through `_obj`,
* it should be a {@link Builder} function which is then called with `_env` and the popped top-level value list
* which contains the values collected for the rule in order.
*
* For the immediately following `'goto'`,
* if the `'reduce'` result is an array it is concatenated to the next level value list;
* otherwise the result is pushed onto the next level value list.
*
* The name of the non-terminal is searched in the properties of `_obj` or by inheritance, i.e.,
* if one property name is `{}` it's associated value is searched recursively.
*
* @param {object} [_env] environment, passed through to the functions found through the properties in `_obj`.
* @param {object} [_obj] collection of {@link Builder} functions, i.e., semantic actions.
*
* @returns {Observer} a function to observe a parser.
*
* @see page.trace
*/
page.SLR1.prototype.reflect = function (_env, _obj) {
var self = this;
// closure
return function (_at, _tuple, _action, _info, _sender) {
self.assert('__WHERE__', typeof _at == 'number');
self.assert('__WHERE__', _tuple instanceof page.Tuple);
self.assert('__WHERE__', typeof _action == 'string');
!_info || self.assert('__WHERE__', typeof _info == 'string' || typeof _info == 'number'
|| _info instanceof page.BNF.Rule || _info instanceof page.BNF.T);
self.assert('__WHERE__', _sender === self);
switch (_action) {
case 'shift':
// return token's input string or null
return _tuple.t instanceof page.BNF.Token && _tuple.t != self.bnf.token() ? _tuple.value : null;
case 'reduce':
page.assert('__WHERE__', _info instanceof page.BNF.Rule);
page.assert('__WHERE__', self.stack.length >= _info.symbols.length);
var len = _info.symbols.length, values = [ ];
if (len) {
// take len values in order off the stack
var slice = self.stack.slice(- len);
// concatenate the values, ignore null
for (var s in slice)
if ((slice[s].value) != null)
values = values.concat(slice[s].value); // flatten array, append value
}
// reflect
for (var o = _obj; o; o = o['{}'])
if (_info.nt.name in o) {
return o[_info.nt.name](values, _env, _tuple, _sender);
}
return values;
case 'goto':
return null;
case 'accept':
self.assert('__WHERE__', self.stack.length == 2);
return self.stack.pop().value;
case 'error': // return if non-empty, pop if empty
page.assert('__WHERE__', self.stack.length > 0);
page.assert('__WHERE__', !_info || typeof _info == 'string');
return _info;
}
self.assert('__WHERE__');
};
};
/** Creates a function which can observe
* {@link page.SLR1#parse}. Unlike {@link page.SLR1#reflect}
* this observer collects the values associated with *all*
* terminal symbols and offers them to `_obj`.
*
* @param {object} [_env] environment, passed through to the functions found through the properties in `_obj`.
* @param {object} [_obj] collection of {@link Builder} functions, i.e., semantic actions.
*
* @returns {Observer} a function to observe a parser.
*
* @see page.trace
*/
page.SLR1.prototype.yaccpar = function (_env, _obj) {
var self = this;
// closure
return function (_at, _tuple, _action, _info, _sender) {
self.assert('__WHERE__', typeof _at == 'number');
self.assert('__WHERE__', _tuple instanceof page.Tuple);
self.assert('__WHERE__', typeof _action == 'string');
!_info || self.assert('__WHERE__', typeof _info == 'string' || typeof _info == 'number'
|| _info instanceof page.BNF.Rule || _info instanceof page.BNF.T);
self.assert('__WHERE__', _sender === self);
switch (_action) {
case 'shift':
// return associated terminal value
return _tuple.value;
case 'reduce':
self.assert('__WHERE__', _info instanceof page.BNF.Rule);
// copy the values
var len = _info.symbols.length;
var values = len > 0 ? self.stack.slice(- len).map(function (sv) { return sv.value; }) : [];
for (var o = _obj; o; o = o['{}'])
if (_info.nt.name in o) {
return o[_info.nt.name](values);
}
// default: $$ = $1
return len ? values[0] : null;
case 'goto':
return null;
case 'accept':
self.assert('__WHERE__', self.stack.length == 2);
return self.stack.pop().value
case 'error': // return if non-empty, pop if empty
self.assert('__WHERE__', self.stack.length > 0);
self.assert('__WHERE__', !_info || typeof _info == 'string');
return _info;
}
self.assert('__WHERE__');
};
};