Source: EBNF/EBNF.js

"use strict";

/** @fileOverview Implements `class page.EBNF extends page.BNF`.
  * @author Axel T. Schreiner <ats@cs.rit.edu>
  * @version 1.5.0
  */

/** Creates a new grammar representation.
  * @class Represents a grammar as a tree; supports choices and iterations.
  * @extends page.BNF
  * 
  * @property {page.BNF} bnf grammar represented in BNF, terminals are shared here.
  *   This object is created here and completed in {@link page.EBNF#init}.
  */
page.EBNF = function () {
  page.BNF.call(this);

  // create bnf object here
  this.bnf = new page.BNF();

  // share terminals here (does include $error and $eof)
  this.bnf.lits = this.lits;
  this.bnf.litsByValue = this.litsByValue;
  this.bnf.tokens = this.tokens;
  this.bnf.tokensByName = this.tokensByName;
};

page.subclass(page.EBNF, 'page.EBNF', page.BNF);

/** Factory method: Represent a grammar described in EBNF.
  *
  * This is bootstrapped using {@link page.EBNF.ctor} 
  * to create an LL(1) compiler for EBNF which uses {@link page.EBNF.builder}.
  * @static
  * 
  * @param {string} _grammar description in EBNF;
  *   comments extend from `#` to the end of a line.
  *
  * @returns {page.EBNF} a new grammar representation; check `.errors`.
  */
page.EBNF.from = function (_grammar) {
  page.assert('__WHERE__', typeof _grammar == 'string');
  
  // create a compiler which recognizes EBNF
  var ebnfCompiler = new page.LL1(page.EBNF.ctor());
  page.assert('__WHERE__', ebnfCompiler.ambiguous == 0 && ebnfCompiler.greedy == 0 && ebnfCompiler.errors == 0);

  // build the new grammar
  var ebnf = new page.EBNF();
  try {
    ebnfCompiler.parser(ebnfCompiler.tokenizer(/\s+|#.*/)(_grammar), ebnfCompiler.reflect(ebnf, page.EBNF.builder));
  } catch (e) {
    page.assert('__WHERE__', e instanceof Error);
    ebnfCompiler.error(e);
  }
  ebnf.errors += ebnfCompiler.errors;
  return ebnf;
};

/** Nodes in an EBNF grammar.
  * @typedef eNode
  * @type page.BNF.Lit|page.BNF.Token|page.EBNF.Alt|page.EBNF.Lst|page.EBNF.NT|page.EBNF.Rep|page.EBNF.Seq
  */

/** Asserts that a node has a type suitable for EBNF.
  * @private
  *
  * @param {string} _marker to identify position across a re-throw.
  * @param {eNode} _node to test.
  *
  * @throws {Error} if type is not suitable.
  * @see page.assert
  */
page.EBNF.prototype.eNode = function (_marker, _node) {
  this.assert(_marker, _node instanceof page.EBNF.Alt
       || _node instanceof page.EBNF.Lst || _node instanceof page.EBNF.NT
       || _node instanceof page.EBNF.Rep || _node instanceof page.EBNF.Seq
       || _node instanceof page.BNF.Lit || _node instanceof page.BNF.Token);
};

/** The EBNF grammar manually represented with {@link page.EBNF}.
  * @static
  *
  * @returns {page.EBNF} the EBNF grammar for EBNF, hand-crafted.
  */
page.EBNF.ctor = function () {
  var g = new page.EBNF();
  g.init(g.nt('grammar'),
    [ g.rule(g.nt('grammar'),     g.seq([ g.many(g.nt('token')), g.some(g.nt('rule')) ])),

      g.rule(g.nt('token'),       g.seq([ g.lit('%token'),
        g.token('Name', '[a-zA-Z][-a-zA-Z0-9_]*'), // alphanum with _ and -
        g.token('Literal', "'(?:[^'\n\\\\]|\\\\['n\\\\])+'"), // single quoted
        g.lit(';') ])),

      g.rule(g.nt('rule'),        g.seq([ g.nt('nt'), g.lit(':'), g.nt('alt'), g.lit(';') ])),

      g.rule(g.nt('nt'),          g.token('Name')),

      g.rule(g.nt('alt'),         g.someList(g.nt('seq'), g.lit('|'))),

      g.rule(g.nt('seq'),         g.some(g.nt('item'))),

      g.rule(g.nt('item'),        g.seq([ g.nt('term'),
        g.opt(g.alt([ g.nt('opt'), g.nt('some'), g.nt('many') ])) ])),

      g.rule(g.nt('opt'),         g.lit('?')),
      g.rule(g.nt('some'),        g.seq([ g.lit('+'),
        g.opt(g.seq([ g.lit('/'), g.nt('term') ])) ])),
      g.rule(g.nt('many'),        g.seq([ g.lit('*'),
        g.opt(g.seq([ g.lit('/'), g.nt('term') ])) ])),

      g.rule(g.nt('term'),
        g.seq([ g.alt([ g.nt('token_or_nt'), g.nt('lit'),
          g.seq([ g.lit('('), g.nt('alt'), g.lit(')') ])  ]) ])),

      g.rule(g.nt('token_or_nt'), g.token('Name')),

      g.rule(g.nt('lit'),         g.token('Literal'))
    ]);
  return g;
};

{
/** The EBNF grammar for EBNF.
  * @static
  */
page.EBNF.grammar =
  "    %token Name    '[a-zA-Z][-a-zA-Z0-9_]*';                               \n" +
  "    %token Literal '\\'(?:[^\\'\\n\\\\\\\\]|\\\\\\\\[\\'n\\\\\\\\])+\\'';  \n" +

  "    grammar: token* rule+;                                                 \n" +

  "    token: '%token' Name Literal ';';                                      \n" +

  "    rule: nt ':' alt ';';                                                  \n" +

  "    nt: Name;                                                              \n" +

  "    alt: seq +/ '|';                                                       \n" +

  "    seq: item+;                                                            \n" +

  "    item: term ( opt | some | many )?;                                     \n" +

  "    opt: '?';                                                              \n" +
  "    some: '+' ( '/' term )?;                                               \n" +
  "    many: '*' ( '/' term )?;                                               \n" +

  "    term: token_or_nt | lit | '(' alt ')';                                 \n" +

  "    token_or_nt: Name;                                                     \n" +

  "    lit: Literal;                                                          \n";
}

/** An object with action functions corresponding to {@link page.EBNF}`.grammar`
  * for {@link page.LL1#reflect} which require a new {@link page.EBNF} as
  * environment to represent a grammar in EBNF notation using this environment.
  * @static
  */
page.EBNF.builder = {
        // grammar: tokens rules
        // token: '%token' Name Literal ';'
        // nt: Name
        // token_or_nt: Name
  '{}': page.BNF.builder, // inherited

  rule: // nt ':' alt ';'
    function (_values, _ebnf, _tuple) {
      if (!_values[0].rule)
        return _ebnf.rule(_values[0], _values[1]);
      _ebnf.error('('+_tuple.lineno+') '+_values[0]+' can only have one rule');
      return [];
    },
    
  alt: // seq +/ '|'
    function (_values, _ebnf) {
      return _ebnf.alt(_values);
    },

  seq: // item+
    function (_values, _ebnf) {
      return _ebnf.seq(_values);
    },

  item: // term ( opt | some | many )?
    function (_values, _ebnf) {
      if (_values.length == 1)
        return _values[0];
      return _values[1](_values[0]);
    },

  opt: // '?'
    function (_values, _ebnf) {
      return function (term) { return _ebnf.opt(term); }
    },

  some: // '+' ( '/' term )?
    function (_values, _ebnf) {
      return function (term) {
        return _values.length ? _ebnf.someList(term, _values[0]) : _ebnf.some(term);
      };
    },

  many: // '*' ( '/' term )?
    function (_values, _ebnf) {
      return function (term) {
        return _values.length ? _ebnf.manyList(term, _values[0]) : _ebnf.many(term);
      };
    }
};

/** Displays EBNF grammar, appends {@link page.EBNF#bnf} and a dump.
  * @override
  * @variation EBNF
  * @returns {string}
  */
page.EBNF.prototype.dump = function () {
  var result = this.rules.map(function (rule) { return rule.dump(); }).join('\n');
  if (this.bnf) result += '\n\n' + this.bnf.dump();
  return result;
};

/** Must be called once to complete EBNF grammar initialization.
  *
  * Completes rule 0: with a new start symbol `$accept`
  * to accept the grammar's original start symbol and then `$eof`.
  *
  * Sets {@link page.BNF.Rule}`.index` as index into the rules array.
  *
  * Uses `.bnf` to represent the grammar in BNF and imports `empty` and the
  * `first` and `follow` sets. The algorithm is careful to create new non-terminals
  * for concealed BNF rules to make the EBNF non-terminals visible in the BNF grammar.
  * @override
  * @variation EBNF
  *
  * @param {page.EBNF.NT} [_start] start symbol of grammar,
  *   default is the non-terminal of rule 1.
  * @param {page.BNF.Rule[]} [_rules] (additional) grammar rules.
  *
  * @see page.BNF#initTree
  */
page.EBNF.prototype.init = function (_start, _rules) {
  var self = this;
    
  // none or valid start symbol
  !_start || self.assert('__WHERE__', _start instanceof page.EBNF.NT);
  
  // common code
  self.initTree(_rules);
  
  // use once only: rule 0 not yet filled in
  self.assert('__WHERE__', !self.rules[0].symbol);

  // rule zero: $accept: start $eof
  self.rules[0].symbol = self.seq([ _start ? _start : self.rules[1].nt, self.lit() ]);

  // index the rules
  self.rules.forEach(function (rule, n) { rule.index = Number(n); });
    
  // translate all but rule zero; don't translate terminals
  self.rules.forEach(function (rule, n) {
    if (n > 0) {
      var nt = rule.nt.toBNF(self);
      if (rule.symbol instanceof page.EBNF.NT)
        self.bnf.rules.push( self.bnf.rule( nt, [ rule.symbol.toBNF(self) ]));
      else if (rule.symbol instanceof page.BNF.T)
        self.bnf.rules.push( self.bnf.rule( nt, [ rule.symbol ]));
      else
        rule.symbol.toBNF(self, nt);
    }
  });
  
  self.bnf.init(undefined, []);

  // connect rule zero to BNF rule zero, i.e., $accept and seq to BNF's $accept
  self.rules[0].nt.nt = self.bnf.nt();                                 // $accept.nt = $accept
  self.rules[0].symbol.nt = self.bnf.nt();                             // seq.nt = $accept
  self.rules[0].symbol.nodes[0].nt = self.bnf.rules[0].symbols[0];     // start.nt = start.nt

  // import empty, first, and follow
  self.nts.forEach(function (nt) { nt.init(); });
  self.rules.forEach(function (rule) { rule.init(); });

  // import error count, if any
  self.errors += self.bnf.errors; self.bnf.errors = 0;
};

/** Factory method to create a new representation of a choice for EBNF.
  *
  * @param {eNode[]} _nodes alternatives, not empty.
  *
  * @returns {eNode} a new representation of a choice for EBNF;
  *   flattens if there is a single descendant.
  */
page.EBNF.prototype.alt = function (_nodes) {
  var self = this;
  
  self.assert('__WHERE__', _nodes instanceof Array && _nodes.length > 0);
  _nodes.forEach(function (node) { self.eNode('__WHERE__', node); });

  return _nodes.length == 1 ? _nodes[0] : new page.EBNF.Alt(_nodes);
};

/** Factory method to create a new representation of a delimited iteration for EBNF;
  * represents 1 or more occurrences.
  *
  * @param {eNode} _node body.
  * @param {eNode} _delim delimiter.
  *
  * @returns {page.EBNF.Lst} a new representation of a delimited iteration for EBNF.
  */
page.EBNF.prototype.someList = function (_node, _delim) {
  this.eNode('__WHERE__', _node);
  this.eNode('__WHERE__', _delim);

  return new page.EBNF.Lst(_node, _delim, 1);
};

/** Factory method to create a new representation of a delimited iteration for EBNF;
  * represents 0 or more occurrences.
  *
  * @param {eNode} _node body.
  * @param {eNode} _delim delimiter.
  *
  * @returns {page.EBNF.Lst} a new representation of a delimited iteration for EBNF.
  */
page.EBNF.prototype.manyList = function (_node, _delim) {
  this.eNode('__WHERE__', _node);
  this.eNode('__WHERE__', _delim);

  return new page.EBNF.Lst(_node, _delim, 0);
};

/** Factory method to create a unique non-terminal representation,
  * maintains {@link page.EBNF#nts} and {@link page.EBNF#ntsByName},
  * checks against {@link page.EBNF#tokensByName}.
  * @override
  * @variation EBNF
  *
  * @param {string} [_name]  non-terminal's name, cannot start with `$`.
  *   If omitted represents the `$accept` non-terminal.
  *
  * @returns {page.EBNF.NT} a unique non-terminal representation.
  */
page.EBNF.prototype.nt = function (_name) {
  if (arguments.length == 0)
    _name = '$accept';
  else {
    this.assert('__WHERE__', typeof _name == 'string' && _name.length > 0 && _name[0] != '$');
    this.assert('__WHERE__', !(_name in this.tokensByName)); // token
  }

  if (_name in this.ntsByName)
    return this.ntsByName[_name];

  var result = new page.EBNF.NT(_name, this.nts.length);
  this.nts.push(result);
  return this.ntsByName[_name] = result;
};

/** Factory method to create a new representation of an iteration for EBNF;
  * represents 0 to 1 occurence.
  *
  * @param {eNode} _node body.
  *
  * @returns {page.EBNF.Rep} a new representation of an iteration for EBNF.
  */
page.EBNF.prototype.opt = function (_node) {
  this.eNode('__WHERE__', _node);
  return new page.EBNF.Rep(_node, 0, 1);
};

/** Factory method to create a new representation of an iteration for EBNF;
  * represents 1 or more occurrences.
  *
  * @param {eNode} _node body.
  *
  * @returns {page.EBNF.Rep} a new representation of an iteration for EBNF.
  */
page.EBNF.prototype.some = function (_node) {
  this.eNode('__WHERE__', _node);
  return new page.EBNF.Rep(_node, 1, undefined);
};

/** Factory method to create a new representation of an iteration for EBNF;
  * represents 0 or more occurrences.
  *
  * @param {eNode} _node body.
  *
  * @returns {page.EBNF.Rep} a new representation of an iteration for EBNF.
  */
page.EBNF.prototype.many = function (_node) {
  this.eNode('__WHERE__', _node);
  return new page.EBNF.Rep(_node, 0, undefined);
};

/** Factory method to create a rule representation for EBNF.
  * Sets it as rule's non-terminal's {@link page.EBNF.NT#rule}
  * @override
  * @variation EBNF
  *
  * @param {page.EBNF.NT} _nt  left-hand side, non-terminal, may not yet have a rule.
  * @param {eNode} [_symbol] single symbol for right-hand side.
  *   if omitted, must be rule 0 for `$accept`.
  *
  * @returns {page.EBNF.Rule} a new rule representation.
  *
  * @see page.EBNF.init
  */
page.EBNF.prototype.rule = function (_nt, _symbol) {

  if (arguments.length == 1) {
    this.assert('__WHERE__', this.rules.length == 0 && _nt == this.nt());
    
    _symbol = null;  // filled in during init

  } else {
    this.assert('__WHERE__', _nt instanceof page.EBNF.NT && !_nt.rule);
    this.eNode('__WHERE__', _symbol);
  }
  
  var result = new page.EBNF.Rule(_nt, _symbol);
  // set new rule as rule's nt's rule
  result.nt.rule = result;
  return result;
};

/** Factory method to create a new representation of a sequence for EBNF.
  *    
  * @param {eNode[]} _nodes sequence, not empty.
  *
  * @returns {eNode} a new representation of a sequence for EBNF;
  *   flattens if there is a single descendant.
  */
page.EBNF.prototype.seq = function (_nodes) { 
  var self = this;
  
  self.assert('__WHERE__', _nodes instanceof Array && _nodes.length > 0);
  _nodes.forEach(function (node) { self.eNode('__WHERE__', node); });

  return _nodes.length == 1 ? _nodes[0] : new page.EBNF.Seq(_nodes);
};