B Content Markup Validation Grammar

Overview: Mathematical Markup Language (MathML) Version 2.0
Previous: A Parsing MathML
Next: C Content Element Definitions
 
B Content Markup Validation Grammar

Informal EBNF grammar for Content Markup structure validation
=============================================================
// Notes
//
// This defines the valid expression trees in content markup
//
// ** it does not define attribute validation -
// ** this has to be done on top
//
// Presentation_tags is a placeholder for a valid
// presentation element start tag or end tag
//
// #PCDATA is the XML parsed character data
//
// symbols beginning with '_' for example _mmlarg are internal symbols
// (recursive grammar usually required for recognition)
//
// all-lowercase symbols for example 'ci' are terminal symbols
// representing MathML content elements
//
// symbols beginning with Uppercase are terminals
// representating other tokens
//
// revised sb 3.nov.97, 16.nov.97 and 22.dec.1997
// revised sb 6.jan.98, 6.Feb.1998 and 4.april.1998
// revised sb 27.nov.2000 for MathML2.0
//
// whitespace  definitions  including presentation_tags
Presentation_tags ::= "presentation"        //placeholder
Space    ::= #x09 | #xoA | #xoD | #x20      //tab, lf, cr, space characters
S        ::= (Space | Presentation_tags)*   //treat presentation as space
// only for content validation
// characters
Char     ::= Space | [#x21 - #xFFFD] 
             | [#x00010000 - #x7FFFFFFFF]   //valid XML chars
// start and end tag functions
// start(\%x) returns a valid start tag for the element \%x
// end(\%x) returns a valid end tag for the element \%x
// empty(\%x) returns a valid empty tag for the element \%x
//
// start(ci)    ::= "<ci>"
// end(cn)      ::= "</cn>"
// empty(plus)  ::= "<plus/>"
//
// The reason for doing this is to avoid writing a grammar
// for all the attributes. The model below is not complete
// for all possible attribute values.

_start(\%x)   ::= "<\%x" (Char - '>')* ">"    // returns a valid start tag for the element \%x
_end(\%x)     ::= "<\%x" Space* ">"           // returns a valid end tag for the element \%x
_empty(\%x)   ::= "<\%x" (Char - '>')* "/>"   // returns a valid empty tag for the element \%x
_sg(\%x)      ::= S _start(\%x)               // start tag preceded by optional whitespace
_eg(\%x)      ::= _end(\%x) S                 // end tag followed by optional whitespace
_ey(\%x)      ::= S _empty(\%x) S      // empty tag preceded and followed by optional whitespace

// mathml content constructs
_mmlall         ::= _container | _relation | _operator | _qualifier | _other
_mmlarg         ::= _container 
_container      ::= _token | _special | _constructor
_token          ::= ci | cn | csymbol | _constantsym
_special        ::= apply | lambda | reln | fn
_constructor    ::= interval | list | matrix | matrixrow | set | vector	| piecewise
                     | piece | otherwise
_other          ::= condition |  declare | sep
_qualifier      ::= lowlimit | uplimit | bvar | degree | logbase | domainofapplication
                     | momentabout
_constantsym    ::= integers | rationals | reals | naturalnumbers | complexes | primes 
                     | exponentiale | imaginaryi | notanumber | true | false | pi
                     | eulergamma | infinity

// relations
_relation       ::= _genrel | _setrel | _seqrel2ary
_genrel         ::= _genrel2ary | _genrelnary
_genrel2ary     ::= ne
_genrelnary     ::= eq | leq | lt | geq | gt
_setrel         ::=  _seqrel2ary | _setrelnary
_setrel2ary     ::=  in | notin | notsubset | notprsubset
_setrelnary     ::= subset | prsubset
_seqrel2ary     ::= tendsto

//operators
_operator       ::= _funcop | _sepop | _arithop | _calcop | _vcalcop
                | _seqop | _trigop | _classop | _statop | _lalgop
                | _logicop | _setop

//functional operators
_funcop         ::= _funcop1ary | _funcopnary
_funcop1ary     ::= inverse     | ident | domain | codomain | image
_funcopnary     ::= fn| compose   // general user-defined function is n-ary

// arithmetic operators
// (note minus is both 1ary and 2ary)
_arithop        ::= _arithop1ary | _arithop2ary | _arithopnary  | root
_arithop1ary    ::= abs | conjugate | factorial | minus | arg | real | imaginary
                     | floor | ceiling
_arithop2ary    ::= quotient | divide | minus | power | rem
_arithopnary    ::= plus | times | max | min | gcd | lcm

// calculus and vector calculus
_calcop         ::= int | diff | partialdiff
_vcalcop        ::= divergence | grad | curl | laplacian

// sequences and series
_seqop          ::= sum | product | limit

// elementary classical functions and trigonometry

_classop        ::= exp | ln | log

_trigop         ::= sin | cos | tan | sec | csc | cot | sinh 
                    | cosh | tanh | sech | csch | coth 
                    | arcsin | arccos | arctan

// statistics operators
_statop         ::= _statopnary | moment
_statopnary     ::= mean | sdev | variance | median | mode

// linear algebra operators
_lalgop         ::= _lalgop1ary |_lalgop2ary | _lalgopnary
_lalgop1ary     ::= determinant | transpose
_lalgop2ary     ::= vectorproduct | scalarproduct | outerproduct
_lalgopnary     ::= selector

// logical operators
_logicop        ::= _logicop1ary | _logicopnary | _logicop2ary | _logicopquant
_logicop1ary    ::= not
_logicop2ary    ::= implies	| equivalent | approx | factorof
_logicopnary    ::= and | or | xor
_logicopquant   ::= forall | exists

// set theoretic operators
_setop          ::= _setop1ary |_setop2ary | _setopnary
_setop1ary      ::= card
_setop2ary      ::= setdiff
_setopnary      ::= union | intersect | cartesianproduct

// operator groups
_unaryop        ::=  _func1ary | _arithop1ary | _trigop | _classop  
                     | _calcop | vcalcop | _logicop1ary	| _lalgop1ary | setop1ary
_binaryop       ::=  _arithop2ary | _setop2ary | _logicop2ary | _lalgop2ary
_naryop         ::=  _arithopnary | _statopnary  | _logicopnary 
                     | _lalgopnary | _setopnary | _funcopnary
_ispop          ::= int | sum | product
_diffop         ::= diff | partialdiff
_binaryrel      ::= _genrel2ary | _setrel2ary |  _seqrel2ary
_naryrel        ::= _genrelnary | _setrelnary

//separator
sep             ::=  _ey(sep)

// leaf tokens  and data content of leaf elements
// note _mdata includes Presentation constructs here.
_mdatai         ::= (#PCDATA | Presentation_tags)*
_mdatan         ::= (#PCDATA | sep | Presentation_tags)*
ci              ::=  _sg(ci) _mdatai _eg(ci)
cn              ::=  _sg(cn) _mdatan _eg(cn)
csymbol         ::=  _sg(csymbol) _mdatai _eg(csymbol)

// condition -  constraints constraints. contains either
// a single reln (relation), or
// an apply holding a logical combination of relations, or
// a set (over which the operator should be applied)
condition       ::= _sg(condition)  reln | apply | set  _eg(condition)

// domains for integral, sum , product
_ispdomain      ::= (lowlimit  uplimit?)
                | uplimit
                | interval
                | condition

// apply construct
// Note that apply is used in place of the deprecated reln in MathML2.0
//	 for relational operators as well as arithmetic, algebraic etc.
//
apply           ::= _sg(apply) _applybody | _relnbody _eg(apply)
_applybody      ::= 
      ( _unaryop _mmlarg  )                   //1-ary ops
    | (_binaryop _mmlarg _mmlarg)             //2-ary ops
    | (_naryop _mmlarg*)                      //n-ary ops, enumerated arguments
    | (_naryop bvar* condition _mmlarg)       //n-ary ops, condition defines argument list
    | (_ispop  bvar? _ispdomain? _mmlarg)     //integral, sum, product
    | (_ispop  domainofapplication? _mmlarg)  //integral, sum, product
    | (_diffop bvar* _mmlarg)                 //differential ops
    | (log logbase? _mmlarg)                  //logs
    | (moment degree? momentabout? _mmlarg*)           //statistical moment
    | (root degree? _mmlarg)                           //radicals - default is square-root
    | (limit bvar* lowlimit? condition? _mmlarg)       //limits
    | (_logicopquant bvar+ condition? (reln | apply))  //quantifier with explicit bound variables



// equations and relations - reln uses lisp-like syntax (like apply)
// the bvar and condition are used to construct a "such that" or
// "where" constraint on the relation  . 
// Note that reln is deprecated but still valid in MathML2.0
reln            ::= _sg(reln) _relnbody _eg(reln)
_relnbody       ::= ( _binaryrel bvar* condition? _mmlarg _mmlarg )
                    | ( _naryrel bvar* condition? _mmlarg*  )

// fn construct
// Note that fn is deprecated but still valid in MathML2.0
fn                      ::= _sg(fn) _fnbody _eg(fn)
_fnbody         ::=  Presentation_tags | container 

// lambda construct     - note at least 1 bvar must be present
lambda          ::=  _sg(lambda) _lambdabody _eg(lambda)
_lambdabody     ::=  bvar+ _container  //multivariate lambda calculus

//declare construct
declare         ::= _sg(declare) _declarebody _eg(declare)
_declarebody    ::= ci (fn | constructor)?

// constructors
interval    ::=  _sg(interval) _mmlarg _mmlarg _eg(interval)    //start, end define interval
set         ::=  _sg(set) _lsbody _eg(set)
list        ::=  _sg(list) _lsbody _eg(list)
_lsbody     ::=  _mmlarg*                  //enumerated arguments
             | (bvar* condition _mmlarg)   //condition constructs arguments

matrix        ::= _sg(matrix) matrixrow* _eg(matrix)
matrixrow     ::= _sg(matrixrow) _mmlall* _eg(matrixrow)         //allows matrix of operators

vector        ::= _sg(vector) _mmlarg* _eg(vector)

piecewise     ::= _sg(piecewise) piece* otherwise? _eg(piecewise)
piece         ::= _sg(piece) _mmlall _eg(piece)          //allows piecewise construct of operators
otherwise     ::= _sg(otherwise) _mmlall _eg(otherwise)  //allows piecewise construct of operators

//qualifiers - note the contained _mmlarg could be a reln
lowlimit            ::= _sg(lowlimit) _mmlarg  _eg(lowlimit)
uplimit             ::= _sg(uplimit) _mmlarg _eg(uplimit)
bvar                ::= _sg(bvar) ci degree? _eg(bvar)
degree              ::= _sg(degree) _mmlarg _eg(degree)
logbase             ::= _sg(logbase) _mmlarg _eg(logbase)
domainofapplication ::= _sg(domainofapplication) _mmlarg _eg(domainofapplication)
momentabout         ::= _sg(momentabout) _mmlarg _eg(momentabout)

//relations and operators and constant symbols
// (one declaration for each operator and relation element)
_relation       ::= _ey(\%relation)         //for example <eq/>  <lt/>
_operator       ::= _ey(\%operator)         //for example <exp/> <times/>
_const-symbol   ::= _ey(\%const-symbol)     //for example <integers/> <false/>

//the top level math element
//allow declare only at the head of a math element.
math            ::= _sg(math) declare* mmlall* _eg(math)

Overview: Mathematical Markup Language (MathML) Version 2.0
Previous: A Parsing MathML
Next: C Content Element Definitions