Source: SLR1/SLR1.js

"use strict";

/** @fileOverview Implements `class page.SLR1`.
  * @author Axel T. Schreiner <ats@cs.rit.edu>
  * @version 1.5.1
  */

/** Wraps a suitable grammar for bottom-up parsing.
  * The {@link page.BNF} grammar is checked to be SLR(1).
  * The state/symbol/action table is created which results in the discovery of any conflicts.
  *
  * `shift` actions are generated for all terminals anticipated by traversing the grammar.
  * `reduce` actions are generated for all terminals in the `follow` set of a rule.
  *
  * `reduce`/`reduce` conflicts are reported and resolved in favor of the rule
  * defined first. `shift`/`reduce` conflicts are reported and resolved in favor of `shift`
  * unless they can be resolved through precedences.
  *
  * If precedences are available, a rule is reduced if it has higher precedence
  * than a `follow` terminal. The `follow` terminal is shifted if it has
  * higher precedence than the rule. If the precedences are equal, a left-associative
  * rule is reduced, a right-associative rule causes the `follow` terminal
  * to be shifted, and for a non-associative rule the `follow` terminal is considered an error,
  * i.e., it is removed from the action table.
  *
  * All rules are checked that they can be reduced:
  * The constructor deletes a `boolean` field `.reduced` from all rules of the {@link page.BNF} grammar.
  * It then creates state 0 from rule zero marked in position zero and tells
  * each state to {@link page.SLR1.State#advance}.
  * This adds a `boolean` field `.reduced` to each {@link page.BNF.Rule}
  * which can be reduced. If rules cannot be reduced they will be reported as errors.
  *
  * @class Tests and wraps a SLR(1) grammar for bottom-up parsing.
  * An object of this class supports one execution of the parsing algorithm at any one time.
  * The object is serially reusable.
  *
  * {@link page.SLR1#parse} is a stack-based parsing algorithm for {@link page.BNF} grammars
  * (which can be derived from {@link page.EBNF} grammars).
  *
  * @param {page.BNF|page.EBNF} _grammar to be used for parsing.
  *   Left-recursive grammars and precedence grammars are more efficient.
  *   The underlying BNF (precedence) grammar is checked whether it is SLR(1).
  * @param {Observer} [_observer] to observe recognition.
  *
  * @property {page.BNF} bnf BNF (precedence) grammar to control recognition.
  * @property {page.EBNF} ebnf EBNF (precedence) grammar, if available.
  * @property {Observer} observer to observe recognition.
  * @property {page.SLR1.State[]} states state/symbol/action table.
  * @property {number} errors number of errors in creating the state table.
  * @property {number} sr number of shift/reduce conflicts.
  * @property {number} rr number of reduce/reduce conflicts.
  *
  * @property {object[]} stack currently active states and associated values,
  *   exists only while {@link page.SLR1#parse} is used.
  * @property {number} stack.state state number.
  * @property {object} stack.value returned by observer for `shift` and `goto` into the state.
  *
  * @see page.SLR1.State#advance
  * @see page.BNFP
  * @see page.EBNFP
  */
page.SLR1 = function (_grammar, _observer) {
  var self = this;
  self.assert('__WHERE__', _grammar instanceof page.BNF);
  !_observer || self.assert('__WHERE__', typeof _observer == 'function');

  if (_grammar instanceof page.EBNF) {
    self.ebnf = _grammar;
    self.bnf = _grammar.bnf;
    // import error count if any
    self.errors = self.ebnf.errors; self.ebnf.errors = 0;

  } else {
    self.ebnf = undefined;
    self.bnf = _grammar;
    // import error count if any
    self.errors = self.bnf.errors; self.bnf.errors = 0;
  }

  if (_observer) self.observer = _observer;
  self.states = [];
  self.sr = 0;
  self.rr = 0;
  
  // delete .reduced fields
  self.bnf.rules.forEach(function (rule) { delete rule.reduced; });
  
  // create state[0]: mark rule 0 in position 0
  self.states.push(self.state([self.config(self.bnf.rules[0], 0)]));

  // tell each state to advance; self creates new states which are also advanced
  for (var s = 0; s < self.states.length; ++ s)
    self.states[s].advance(self, s);

  // check that all rules can be reduced
  self.bnf.rules.forEach(function (rule, r) {
      if (!rule.reduced)
        self.error('rule', r, '(' + rule + ')', 'is never reduced');
    });
};

page.baseclass(page.SLR1, 'page.SLR1');

/** Delegates to `.bnf` to create a function which tokenizes a string.
  *
  * @param {string|RegExp} [_skip] a pattern to define ignorable character sequences;
  *   this pattern must not accept empty input and it must use `(:? )`
  *   rather than `( )` for grouping; the default is to skip white space.
  *
  * @returns {Tokenizer} a function which takes a string and returns a list of {@link page.Tuple} elements.
  *   The list contains one tuple with the `error` token for each
  *   character which is neither ignorable nor a literal or part of a token.
  *   The function is based on a regular expression which can be displayed as a member `.pattern`.
  *
  * @see page.BNF#tokenizer
  */
page.SLR1.prototype.tokenizer = function (_skip) {
  return this.bnf.tokenizer(_skip);
};

/** Displays state table, number of errors and conflicts, if any, and delegates to the grammar.
  * @override
  *
  * @returns {string}
  */
page.SLR1.prototype.toString = function () {
  return this.states.map(function (state, n) {
        return 'state ' + n + '\n' + state.toString(); 
      }).join('\n\n') + '\n' +
    (this.errors ? '\nerrors: ' + this.errors : '') +    
    (this.sr ? '\nshift/reduce conflicts: ' + this.sr : '') +    
    (this.rr ? '\nreduce/reduce conflicts: ' + this.rr : '') +
    (this.errors + this.sr + this.rr ? '\n\n' : '\n') +
    (this.ebnf ? this.ebnf.toString() : this.bnf.toString());
};

/** Displays state table with closures, number of errors and conflicts, and delegates to the grammar.
  * @override
  *
  * @returns {string}
  */
page.SLR1.prototype.dump = function () {
  var self = this;
  var result = '';
  
  if ('stack' in self)
    result = 'active stack:\n  ' +
      self.stack.map(function(sv) {
        var state = sv.state, value = sv.value;
        value = value == null ? 'null' :
          value instanceof Array ? '[ ] ' + value :
            (typeof value).substr(0, 3) + ' ' + value;
        if (value.length > 23) value = value.substr(0, 23);
        return (state + '    ').substr(0, 4) + '  ' + value;
      }).join('\n  ') + '\n\n';

  return result +
    self.states.map(function (state, n) {
        return 'state ' + n + '\n' + state.dump(); 
      }).join('\n\n') + '\n' +
    (self.errors ? '\nerrors: ' + self.errors : '') +
    (self.sr ? '\nshift/reduce conflicts: ' + self.sr : '') +
    (self.rr ? '\nreduce/reduce conflicts: ' + self.rr : '') +
    (self.errors + self.sr + self.rr ? '\n\n' : '\n') +
    (self.ebnf ? self.ebnf.dump() : self.bnf.dump());
};

/** Factory method to create the accept action for `$eof`.
  *
  * @returns {page.SLR1.Action} an object representing the action.
  */
page.SLR1.prototype.accept = function () {
  return new page.SLR1.Action('accept', this.bnf.lit(), '');
};

/** Factory method to create a reduce action.
  *
  * @param {page.BNF.T} _t terminal on which to take the action.
  * @param {page.BNF.Rule} _rule rule to reduce.
  *
  * @returns {page.SLR1.Action} an object representing the action.
  */
page.SLR1.prototype.reduce = function (_t, _rule) {
  this.assert('__WHERE__', _t instanceof page.BNF.T);
  this.assert('__WHERE__', _rule instanceof page.BNF.Rule);

  return new page.SLR1.Action('reduce', _t, _rule);
};

/** Factory method to create a shift or goto action.
  *
  * @param {page.BNF.T|page.BNF.NT} _symbol on which to take the action.
  * @param {number} _state state to shift to.
  *
  * @returns {page.SLR1.Action} an object representing the action.
  */
page.SLR1.prototype.shift_or_goto = function (_symbol, _state) {
  this.assert('__WHERE__', _symbol instanceof page.BNF.T || _symbol instanceof page.BNF.NT);
  this.assert('__WHERE__', typeof _state == 'number');

  return new page.SLR1.Action(_symbol instanceof page.BNF.T ? 'shift' : 'goto',
    _symbol, _state);
};

/** Factory method to represent a mark in a rule.
  *
  * @param {page.BNF.Rule} _rule rule to mark.
  * @param {number} _position position in rule, before a symbol or after all.
  *
  * @returns {page.SLR1.Config} an object representing the marked rule.
  */
page.SLR1.prototype.config = function (_rule, _position) {
  this.assert('__WHERE__', _rule instanceof page.BNF.Rule);
  this.assert('__WHERE__', typeof _position == 'number' &&
    _position >= 0 && _position <= _rule.symbols.length);

  return new page.SLR1.Config(_rule, _position);
};

/** Factory method to represent a state of the SLR(1) automaton.
  * The state is created from the core configurations;
  * rules in the closure are added.
  *
  * @param {page.SLR1.Config[]} _core list of configurations in the core.
  *
  * @returns {page.SLR1.State} an object representing the state.
  */
page.SLR1.prototype.state = function (_core) {
  var self = this;
  self.assert('__WHERE__', _core instanceof Array && _core.length > 0);
  _core.forEach(function (config) { self.assert('__WHERE__', config instanceof page.SLR1.Config); });

  var coreLength = _core.length;  // for efficient state comparison
  var actions = {};  // initialized to map all possible next symbols to null

  // compute closure: loop over core and added configurations
  for (var c = 0; c < _core.length; ++ c)
    // for each incomplete configuration
    if (!_core[c].complete) {
      // next symbol in a configuration
      var s = _core[c].rule.symbols[_core[c].position];
      if (s instanceof page.BNF.NT && !(s.ord in actions))
        // add all rules for a new non-terminal, marked at 0
        s.rules.forEach(function (rule) {
          _core.push(self.config(rule, 0));
        });
      // map this next terminal or non-terminal to null
      actions[s.ord] = null;
    }

  return new page.SLR1.State(_core, coreLength, actions);
};

/** Checks bottom-up if input conforms to a BNF grammar.
  * For an EBNF grammar this method uses the corresponding BNF grammar.
  * This method can be called repeatedly with portions of input.
  *
  * The method uses a state stack and attempts error recovery.
  * ??If an input symbol cannot be matched, the parser function will search the rest of the current
  * and all outer active rules for a possible match with the input symbol.
  * If there is no match, the input symbol is discarded and the search is tried again
  * until an `irrecoverable error` is reported at end of input.
  * 
  * ##### Create parsers and scanner
  *     var parser = new page.SLR1(page.BNF.from(' ... grammar described in BNF ...'));
  *     var parser = new page.SLR1(page.EBNF.from(' ... grammar described in EBNF ...'));
  *     var scanner = parser.tokenizer(/ skip pattern /);
  *
  * ##### Parse -- all input
  *     parser.parse(scanner('... input ...').concat([ null ]));
  *
  * ##### Parse -- push tuples
  *     var result = parser.parse(scanner('... start of input ...'));
  *     while (result === true)
  *       result = parser.parse(scanner('... more input ...')); // eventually null input
  * 
  * ##### Parse a stream -- pull tuples
  *     function stream () { return scanner('... start/more input/null ...'); }
  *     parser.parse(stream);
  * 
  * ##### Trace
  *     parser.parse(stream, page.trace());
  * 
  * ##### Collect token values
  *     var result = parser.parse(stream, parser.reflect());
  *     print(result); // array with token values
  * 
  * ##### Create observer for traced building
  *     var environment = ... something useful for the builder ...;
  *     var builder = {
  *       ruleName: // matched by reflection
  *         function (_values, _environment, _tuple, _sender) {
  *           return ... anything
  *           throw string for error message
  *           throw Error with message to abort
  *         }
  *         ...
  *     };
  *     var observer = page.trace(parser.reflect(environment, builder));
  *  
  * ##### Build
  *     var result = parser.parse(stream, observer);
  *     print(result); // array with builder value(s)
  *
  * @param {page.Tuple[]|Tokenizer} _input either the next portion of input
  *   and `null` or an empty array will match `$eof`, or a {@link Tokenizer} which will be called *without arguments*
  *   and each time must return the next portion of input, `null`, or an empty array.
  * @param {Observer} [_observer] to observe recognition;
  *   cannot be specified once parsing is in progress.
  *  
  * @returns {boolean|object} `true` to request another input array,
  *   or the top-level value (which could be `null`) returned by the observer, if any, when the end of input is reached.
  * 
  * @throws {Error} a message from the observer to abort parsing.
  *
  * @see page.trace
  */
page.SLR1.prototype.parse = function (_input, _observer) {
  var self = this;      // closure

  var current = null;   // current input tuple, if any
  var tuples = [];      // available tuples
  var index = 0;        // index of next tuple
  
  // set current to a new tuple, but not past $eof
  // throw true to ask for more input
  function next () {
    if (index >= tuples.length) {         // no tuples left
      
      if (typeof _input == 'function')    // call back for more
        tuples = _input();
      else if (_input) {                  // consume argument
        tuples = _input;
        _input = false;
      } else if (_input === false)        // throw true for more input
        throw true; 
      else                                // null input: EOF
        tuples = null;
        
      if (!tuples || tuples.length == 0)  // EOF
        tuples = [ new page.Tuple(0, self.bnf.lit(), '') ];
      index = 0;
    }

    // use 'index' tuple
    if ((current = tuples[index]) == null)
      current = tuples[index] = new page.Tuple(0, self.bnf.lit(), '');
    
    // advance index (to later tokenize more) but don't run past $eof tuple
    if (current.t != self.bnf.lit()) ++ index;
  }
  
  self.assert('__WHERE__', typeof _input != 'boolean');

  if ('stack' in self) {
    // multiple calls in progress, only for tuple array, no new observer
    !_input || self.assert('__WHERE__', _input instanceof Array);
    self.assert('__WHERE__', !_observer);

  } else {
    !_input || self.assert('__WHERE__', _input instanceof Array || typeof _input == 'function');
    !_observer || self.assert('__WHERE__', typeof _observer == 'function');
    
    if (_observer) self.observer = _observer;
    
    // begin parsing: create stack and initialize with state 0
    self.stack = [ { state: 0 } ];
  }
  
  self.assert('__WHERE__', self.stack.length > 0);

  var state = self.states[self.stack[self.stack.length - 1].state];   // current state
  
  // inform observer, if any
  // if error action: print result or info if any; return null
  // rethrow Error; return result or null
  function observe (_tuple, _action, _info) {
    self.assert('__WHERE__', _tuple instanceof page.Tuple);
    
    var result = _action == 'error' ? _info : null;
    
    if (self.observer)
      try {
         result = self.observer(self.stack[self.stack.length - 1].state, _tuple, _action, _info, self);
       } catch (e) {
         if (typeof e == 'string')
           self.error(e);
         else {
           self.assert('__WHERE__', e instanceof Error)
           throw 'lineNumber' in e ? new Error('('+e.lineNumber+') ' + e.message) : e;
         }
       }
    
    if (_action == 'error') {
      if (result) self.error(result);
      return null;
    }
    
    return result;
  }

  // perform _action on _tuple, consume on shift
  // return true/false for shift/reduce, i.e., true if _tuple is consumed
  // throw [ result ] on accept
  function perform (_action, _tuple) {
    self.assert('__WHERE__', _action.action != 'goto');

    var result = observe(_tuple, _action.action, _action.info);

    switch (_action.action) {
    case 'accept':
      throw [ result ];                  // success

    case 'shift':
      self.assert('__WHERE__', typeof _action.info == 'number'
                               && _action.info >= 0 && _action.info < self.states.length);

      self.stack.push({ state: _action.info, value: result });     // shift to new state
      state = self.states[_action.info];
      return true;                       // _tuple consumed

    case 'reduce':
      self.assert('__WHERE__', _action.info instanceof page.BNF.Rule);
      self.assert('__WHERE__', self.stack.length > _action.info.symbols.length);
  
      // pop the stack by the length of the rule, uncover state
      self.stack.length -= _action.info.symbols.length;
      state = self.states[self.stack[self.stack.length - 1].state];
  
      // there has to be a goto for the non-terminal
      self.assert('__WHERE__', _action.info.nt.ord in state.actions);
      
      var g = state.actions[_action.info.nt.ord];
      
      self.assert('__WHERE__', g.action == 'goto');
      self.assert('__WHERE__', typeof g.info == 'number'
                               && g.info >= 0 && g.info < self.states.length);
  
      observe(_tuple, g.action, g.info); // observe the goto
      self.stack.push({ state: g.info, value: result });           // goto to new state
      state = self.states[g.info];
      return false;                      // _tuple still available
    }
    self.assert('__WHERE__');            // illegal action
  }
  
  // send one 'error' with an error message.
  // work on input and stack until a shift $error and shift current are done;
  // send 'error' with null message every time the stack is popped.
  // throw [ true ] for more input
  // throw [ result ] for $accept
  // throw [ 'irrecoverable error' ] if the stack is empty.
  // return when current was consumed to resume normal processing.
  function recover () {
    // produce a message listing expected symbols in state table
    var msg = current + ' is not allowed\nexpecting:';
  
    for (var a in state.actions)
      if (state.actions[a].symbol instanceof page.BNF.T
          && state.actions[a].symbol != self.bnf.token()) // not $error
        msg += ' ' + state.actions[a].symbol;

    // run the message past the observer (which may throw a string).
    observe(current, 'error', msg);

    // drop any $error sent by the tokenizer
    while (current.t == self.bnf.token())
      next(); // next tuple, throws true for more input

    // now try to find a shift action for $error
    stackLoop: while (self.stack.length > 1) {
      // as long as state has an action for $error
      while (self.bnf.token().ord in state.actions)
        // perform that action
        if (perform(state.actions[self.bnf.token().ord],
                    new page.Tuple(current.lineno, self.bnf.token(), null)))
          // did shift $error
          while (true) {
            // first discard input until state has an action for current
            while (!(current.t.ord in state.actions)) {
              self.message('discarding ' + current);
              if (current.t == self.bnf.lit()) // $eof
                break stackLoop;
              next(); // next tuple, throws true for more input
            }
            // perform the action for current
            if (perform(state.actions[current.t.ord], current)) {
              next(); // did shift current
              return; // ready for another tuple
            }
            // keep discarding if there's a reduce/goto before current
          }
        // keep trying if there's a reduce/goto before $error
      self.stack.pop();
      state = self.states[self.stack[self.stack.length - 1].state];
    }
    throw [ 'irrecoverable error' ];
  }

  // call/cc: throw [ result ] to return result from the parser
  try {
    while (true) {
      if (!current) next();                     // ascertain input

      self.assert('__WHERE__', self.stack.length > 0);
              
      if (current.t != self.bnf.token()         // not $error tuple
          && current.t.ord in state.actions) {  // anticipated input
        if (perform(state.actions[current.t.ord], current))
          next();                               // consumed
      } else
        recover();
    }
  } catch (outcome) {
    if (outcome === true) return true; // more input
    
    // end parsing: remove stack
    delete self.stack;
    
    if (outcome instanceof Array) return outcome[0];
    self.assert('__WHERE__', outcome instanceof Error);
    throw 'lineNumber' in outcome ? new Error('('+outcome.lineNumber+') ' + outcome.message) : outcome;
  }
};

/** Creates a function which can observe {@link page.SLR1#parse}.
  *
  * The observer function collects a list of all token values and gives each rule a chance to structure the list of values
  * collected for the rule. Conceptually, it works exactly like {@link page.LL1#reflect}, i.e., it maintains a stack of value lists as follows:
  *
  * | action | effect |
  * |--------|--------|
  * | `'shift'` | ignores a current literal or pushes the current token's associated string value onto the top-level value list and returns `null`.
  * | `'reduce'` | pops the top-level value list, interacts with `_obj` if possible; returns the top-level value list or the interaction result.
  * | `'goto'` | concatenates or appends the result of the preceding `'reduce'` to the next level value list and returns null.
  * | `'accept'` | returns the top-level value list.
  * | `'error'` | returns the error message.
  *
  * For `'reduce'`, if a property with the rule's non-terminal's name can be found through `_obj`,
  * it should be a {@link Builder} function which is then called with `_env` and the popped top-level value list
  * which contains the values collected for the rule in order.
  *
  * For the immediately following `'goto'`,
  * if the `'reduce'` result is an array it is concatenated to the next level value list;
  * otherwise the result is pushed onto the next level value list.
  *
  * The name of the non-terminal is searched in the properties of `_obj` or by inheritance, i.e.,
  * if one property name is `{}` it's associated value is searched recursively.
  *
  * @param {object} [_env] environment, passed through to the functions found through the properties in `_obj`.
  * @param {object} [_obj] collection of {@link Builder} functions, i.e., semantic actions.
  *
  * @returns {Observer} a function to observe a parser.
  *
  * @see page.trace
  */
page.SLR1.prototype.reflect = function (_env, _obj) {
  var self = this;

  // closure
  return function (_at, _tuple, _action, _info, _sender) {
    self.assert('__WHERE__', typeof _at == 'number');
    self.assert('__WHERE__', _tuple instanceof page.Tuple);
    self.assert('__WHERE__', typeof _action == 'string');
    !_info || self.assert('__WHERE__', typeof _info == 'string' || typeof _info == 'number'
                          || _info instanceof page.BNF.Rule || _info instanceof page.BNF.T);
    self.assert('__WHERE__', _sender === self);
    
    switch (_action) {
    case 'shift':
      // return token's input string or null
      return _tuple.t instanceof page.BNF.Token && _tuple.t != self.bnf.token() ? _tuple.value : null;
    
    case 'reduce':
      page.assert('__WHERE__', _info instanceof page.BNF.Rule);
      page.assert('__WHERE__', self.stack.length >= _info.symbols.length);

      var len = _info.symbols.length, values = [ ];
      if (len) {
        // take len values in order off the stack
        var slice = self.stack.slice(- len);
        // concatenate the values, ignore null
        for (var s in slice)
          if ((slice[s].value) != null)
            values = values.concat(slice[s].value); // flatten array, append value
      }

      // reflect
      for (var o = _obj; o; o = o['{}'])
        if (_info.nt.name in o) {
          return o[_info.nt.name](values, _env, _tuple, _sender);
        }
      return values;
  
    case 'goto':
      return null;
  
    case 'accept':
      self.assert('__WHERE__', self.stack.length == 2);
      return self.stack.pop().value;

    case 'error': // return if non-empty, pop if empty
      page.assert('__WHERE__', self.stack.length > 0);
      page.assert('__WHERE__', !_info || typeof _info == 'string');
    
      return _info;
    }
    self.assert('__WHERE__');
  };
};

/** Creates a function which can observe 
  * {@link page.SLR1#parse}. Unlike {@link page.SLR1#reflect}
  * this observer collects the values associated with *all*
  * terminal symbols and offers them to `_obj`. 
  *
  * @param {object} [_env] environment, passed through to the functions found through the properties in `_obj`.
  * @param {object} [_obj] collection of {@link Builder} functions, i.e., semantic actions.
  *
  * @returns {Observer} a function to observe a parser.
  *
  * @see page.trace
  */
page.SLR1.prototype.yaccpar = function (_env, _obj) {
  var self = this;
  
  // closure
  return function (_at, _tuple, _action, _info, _sender) {
    self.assert('__WHERE__', typeof _at == 'number');
    self.assert('__WHERE__', _tuple instanceof page.Tuple);
    self.assert('__WHERE__', typeof _action == 'string');
    !_info || self.assert('__WHERE__', typeof _info == 'string' || typeof _info == 'number'
                          || _info instanceof page.BNF.Rule || _info instanceof page.BNF.T);
    self.assert('__WHERE__', _sender === self);
    
    switch (_action) {
    case 'shift':
      // return associated terminal value
      return _tuple.value;
    
    case 'reduce':
      self.assert('__WHERE__', _info instanceof page.BNF.Rule);
    
      // copy the values
      var len = _info.symbols.length;      
      var values = len > 0 ? self.stack.slice(- len).map(function (sv) { return sv.value; }) : [];

      for (var o = _obj; o; o = o['{}'])
        if (_info.nt.name in o) {
          return o[_info.nt.name](values);
        }
    
      // default: $$ = $1
      return len ? values[0] : null;
  
    case 'goto':
      return null;
  
    case 'accept':
      self.assert('__WHERE__', self.stack.length == 2);
      return self.stack.pop().value

    case 'error': // return if non-empty, pop if empty
      self.assert('__WHERE__', self.stack.length > 0);
      self.assert('__WHERE__', !_info || typeof _info == 'string');
    
      return _info;
    }
    self.assert('__WHERE__');
  };
};