import SyntacticTypes from './syntactic-types.js'
import { Template } from './template.js'
/**
* An abstract syntax tree (AST) is a data structure typically created by
* parsing text using a grammar. We create this class for that purpose. It has
* the following attributes:
*
* * Its contents, which may be a string to indicate that this AST is a leaf
* of the tree, or it may be an array of other ASTs, which would be the
* operator and operands, in that order.
* * The {@link Language} from which it was parsed.
*
* Any AST object then has methods for workflows that involve parsing,
* representing, and classifying.
*/
export class AST {
/**
* Construct a new AST from two ingredients, the {@link Language} from which
* it was parsed, and its contents (which can be a string if this AST is a
* leaf, or an array of ASTs otherwise). Also, the second parameter can be
* a JSON structure of nested arrays and strings, which will be recursively
* passed to AST constructors to convert it to an AST.
*
* Note that constructing an AST from a JSON structure takes it as given,
* and does not alter it. To construct an AST that performs changes based
* on associativity of concepts, see {@link AST.fromJSON fromJSON()}.
*
* In an AST, every head (i.e., operator) must be a leaf AST. Even if, for
* example, you plan to use ASTs to represent function application, you must
* use a leaf AST to represent the idea of function application. Thus, for
* example, "f composed with g, applied to x" should not be represented as
* the structure
* `[ [ 'compose', 'f', 'g' ], 'x' ]`, but rather as the structure
* `[ 'apply', [ 'compose', 'f', 'g' ], 'x' ]`.
*
* @param {Language} language - the language from which this AST was parsed
* @param {String|Array} contents - see explanation above
* @see {@link AST.fromJSON fromJSON()}
*/
constructor ( language, contents ) {
this.language = language
if ( contents instanceof Array ) {
if ( contents.length == 0 )
throw new Error( 'An empty AST is not allowed' )
this.contents = contents.map( item =>
item instanceof AST ? item : new AST( language, item ) )
if ( this.contents[0].isCompound() )
throw new Error(
`Non-leaf operator in AST: ${this.contents[0].toString()}` )
} else if ( typeof( contents ) == 'string' ) {
this.contents = contents
} else {
throw new Error( 'AST contents must be a string or an array' )
}
}
/**
* An AST may be a leaf or it may have subtrees, that is to say, it may be
* *compound.* This function detects which is the case, and returns true
* iff this AST is compound.
*
* @return {boolean} whether this AST is a non-leaf (i.e., is compound)
* @see {@link AST#isLeaf isLeaf()}
*/
isCompound () { return this.contents instanceof Array }
/**
* An AST may be a leaf or it may have subtrees. This function detects
* which is the case, and returns true iff this AST is a leaf. This is
* equivalent to `!this.isCompound()`, and is therefore just a convenience
* function.
*
* @return {boolean} whether this AST is a leaf
* @see {@link AST#isCompound isCompound()}
*/
isLeaf () { return !this.isCompound() }
/**
* A non-leaf AST has a nonempty array of children, beginning with the
* operator of the AST, often called its head. This function returns that
* entry. (It does not shift the head off the array, but only returns it,
* leaving the AST unaltered.) If this AST is a leaf, this function
* returns undefined.
*
* @returns {AST} the operator of this AST, if one exists
* @see {@link AST#args args()}
*/
head () { return this.isCompound() ? this.contents[0] : undefined }
/**
* A non-leaf AST has a nonempty array of children, beginning with the
* operator of the AST, often called its head. This function returns the
* rest of that list, the operands, as a JavaScript array (not an AST). If
* this AST is a leaf, this function returns undefined.
*
* @returns {...AST} the operands of this AST
* @see {@link AST#head head()}
*/
args () { return this.isCompound() ? this.contents.slice( 1 ) : undefined }
/**
* A non-leaf AST has a nonempty array of children, beginning with the
* operator of the AST, often called its head, followed by the operands.
* This function returns the number of operands (which is one less than the
* length of the array). If this AST is a leaf, this function returns
* undefined.
*
* @returns {number} the number of operands
* @see {@link AST#args args()}
*/
numArgs () { return this.isCompound() ? this.contents.length - 1 : undefined }
/**
* A non-leaf AST has a nonempty array of children, beginning with the
* operator of the AST, often called its head, followed by the operands.
* This function returns the operand at the given index, where the first
* operand after the head has index 0, and the last operand has index
* `this.numArgs() - 1`. If this AST is a leaf, this function returns
* undefined.
*
* @param {number} index - the index of the operand to return
* @returns {AST} the operand at the given index
* @see {@link AST#args args()}
* @see {@link AST#numArgs numArgs()}
*/
arg ( index ) { return this.isCompound() ? this.contents[index+1] : undefined }
/**
* The {@link module:SyntacticTypes SyntacticTypes module} defines a set of
* syntactic types common to mathematical notation. Each {@link Converter}
* instance can define a set of semantic types to add to the set of
* syntactic types. Those semantic types are called *concepts.* This
* function asks whether this AST represents a concept. This can happen in
* two ways.
*
* * First, if this AST is {@link AST#isCompound compound}, then its
* {@link AST#head head} may represent a concept. Consider two examples:
* * If the operator were the leaf `"expression"`, that would indicate
* that this AST represents an instance of the syntactic type
* `"expression"`, and this AST would not be a concept, and this
* function would return false.
* * If this AST were for a {@link Language} whose {@link Converter}
* defined the concept `"factorial"` to be a specific type of
* expression, and this AST began with the operator `"factorial"`,
* then this function would return true, because this AST is not an
* instance of a syntactic type (expression), but an instance of a
* semantic type (factorial).
* * Second, if this AST is a {@link AST#isLeaf leaf}, then its contents
* (as a string) may represent a concept. For example, if this AST were
* for a {@link Language} whose {@link Converter} defined the constant
* `"pi"` as a concept, and this AST is a leaf containing just the string
* `"pi"`, then this function would return true.
*
* @returns {boolean} `true` if this AST represents a concept
* @see {@link Converter#isConcept isConcept()}
* @see {@link AST#concept concept()}
*/
isConcept () {
return this.language.converter.isConcept(
this.isCompound() ? this.head().contents : this.contents )
}
/**
* The {@link module:SyntacticTypes SyntacticTypes module} defines a set of
* syntactic types common to mathematical notation. Each {@link Converter}
* instance can define a set of semantic types to add to the set of
* syntactic types. Those semantic types are called *concepts.* An AST can
* represent a concept in either of two ways, as documented in the
* {@link AST#isConcept isConcept()} method.
*
* When an AST represents a concept, this function returns that concept as
* an object with the following properties.
*
* * `parentType`, the name of the parent syntactic type
* * `putdown`, the putdown representation of the concept (for example, it
* might be `(+ summand summand)` for the concept `addition`)
* * `typeSequence`, an array of the syntactic types of each of the
* arguments extracted from the putdown notation (for example, it would
* be `["summand","summand"]` in the example above)
*
* @returns {Object} the data about the concept represented in this AST
*/
concept () {
return this.language.converter.concept(
this.isCompound() ? this.head().contents : this.contents )
}
/**
* One of the options that can be provided when defining a new concept in a
* {@link Converter} is how it associates when used multiple times in a row.
* For example, if we see the ordinary mathematical notation `1+2+3`, does
* that mean `(1+2)+3` or `1+(2+3)`? In some cases, it doesn't matter, but
* in others it does. For example, the implication arrow of propositional
* logic typically associates to the right, as in `A->(B->C)`.
*
* This function checks to ensure that every subtree of this AST associates
* according to the options associated with every one of its operators.
* This can be used to filter out undesired results from a list of possible
* ASTs generated by parsing. If an operator has no `"associates"` option
* set for it, then it has no restrictions on how it associates, and thus
* ambiguity of parsing remains for that operator.
*
* Note that this function checks only when an operator is nested inside of
* itself, because that is what the `"associates"` option on concepts means.
* Note further that it applies only to binary operators; associativity is
* not defined for other types. In other words, only subtrees of the form
* `op(op(arg1,arg2),arg3)` or `op(arg1,op(arg2,arg3))` are considered when
* determining the return value of this function.
*
* Finally, if a subtree was created originally from notation that used
* groupers, then there was no ambiguity to resolve, and thus there is no
* check we need to perform. The user is always free to override the
* default associating behavior with the use of groupers.
*
* @returns {boolean} `true` if this AST associates according to the
* options provided in the `"associates"` option for all of the concepts
* used within it, `false` otherwise
*/
associatesCorrectly () {
// Is the root of this AST of the form that needs checking?
if ( this.numArgs() == 2 && this.isConcept() ) {
const direction = this.concept().associates
// if we're supposed to associate left, then the right subtree must
// not be an application of this same concept (unless it was created
// by using groupers)
if ( direction == 'left' ) {
const right = this.arg( 1 )
if ( !right.wasGrouped && right.concept() == this.concept() )
return false
}
// now check the same thing symmetrically for right associativity
if ( direction == 'right' ) {
const left = this.arg( 0 )
if ( !left.wasGrouped && left.concept() == this.concept() )
return false
}
}
// The root doesn't contain any problems, so check all subtrees.
return !this.isCompound()
|| this.contents.every( subtree => subtree.associatesCorrectly() )
}
/**
* A simple string representation of this AST, useful for debugging. For
* example, if this AST represented the addition of x and the product of y
* and z, the representation might be `addition(x,multiplication(y,z))`.
*
* @returns {String} a simple string representation of this AST
*/
toString () {
if ( this.isLeaf() ) return this.contents
const headRepr = this.head().toString()
const argsReprs = this.args().map( arg => arg.toString() )
return `${headRepr}(${argsReprs.join( ',' )})`
}
/**
* Converts this AST into a nested hierarchy of JavaScript arrays, which is
* isomorphic to the AST hierarchy itself, but without any of the data or
* features provided by the AST class. For example, the {@link
* AST#isCompound compound} AST whose {@link AST#toString string
* representation} is `addition(x,multiplication(y,z))` would have the JSON
* form `[ 'addition', 'x', [ 'multiplication', 'y', 'z' ] ]`. But a leaf
* AST has its string contents as its JSON form (i.e., it is not an array).
*
* @returns {Array|String} a JSON representation of this AST
* @see {@link AST.fromJSON fromJSON()}
* @see {@link AST#toString toString()}
*/
toJSON () {
return this.isLeaf() ? this.contents : this.contents.map( x => x.toJSON() )
}
/**
* The {@link Language} class can parse text into hierarchies of JavaScript
* arrays. Those arrays sometimes contain unnecessary information about the
* details of the parsing. For instance, if `x*y` were the notation for
* multiplication in some language, then that {@link Language} class's
* {@link Language#parse parsing function} would return an array with
* contents something like `['multiplication','x','*','y']`. In other
* words, all tokens that were parsed become part of the parsing result,
* even though one of them is redundant now that the semantic type (in this
* example `"multiplication"`) has been added.
*
* This function not only converts such an array into an AST instance, but
* also removes the unnecessary tokens (the `'*'` entry in the example
* above), since the meaning is clear from the first element of the array,
* the name of the semantic type parsed from the `'*'` infix operator.
*
* This function also flattens any nested ASTs for concepts that are
* classified as associative. For example, if the JSON representation of
* the AST were `['addition','x',['addition','y','z']]`, and the concept of
* addition were marked associative (in the {@link Converter}'s list of
* concepts), then this function will not create a nested tree imitating
* that JSON, but will create one isomorphic to `['addition','x','y','z']`
* instead. (Note that associativity is actually not a predicate about each
* operator separately, but a binary relation between the outer and inner
* operators, so this example is a special case.)
*
* @param {Language} language - the language from which the JSON was parsed,
* and which should be used when constructing the AST
* @param {Array} json - a hierarchy of JavaScript Arrays (with strings as
* leaves) representing an AST
* @param {boolean?} top - `true` if this is the top-level call, for internal
* use only; clients should omit this parameter
* @returns {AST} the AST represented by the JSON
*/
static fromJSON ( language, json, top = true ) {
// Base case: Leaf AST
if ( !( json instanceof Array ) ) {
if ( typeof( json ) != 'string' )
throw new Error( `Invalid JSON for conversion to AST: ${json}` )
return new AST( language, json )
}
// If the head does not represent a concept, then it must be a syntactic
// type only, so the only processing we do is call .compact() iff this
// is the top-level call (which includes associativity flattening).
if ( json.length == 0 )
throw new Error( 'Cannot build AST from empty array' )
const concept = language.converter.concepts.get( json[0] )
if ( !concept ) {
const result = new AST( language, json.map(
piece => AST.fromJSON( language, piece, false ) ) )
return top ? result.compact() : result
}
// The head represents a concept, so find which grammar rule matches
// that concept.
json = json.slice()
const head = json.shift()
const rhss = language.rulesFor( head )
for ( let i = 0 ; i < rhss.length ; i++ ) {
// Does this rule have the correct size and contents?
if ( rhss[i].length != json.length ) continue
const matches = rhss[i].every( ( piece, index ) => {
const isNotation = piece instanceof RegExp
const isText = !( json[index] instanceof Array )
return isNotation == isText
&& ( !isNotation || piece.test( json[index] ) )
} )
if ( !matches ) continue
// Yes, it does, and the rule is defined by a single RegExp, and
// thus has just one argument. We apply the head to that one
// argument when building the AST. Again, call .compact() iff
// this is the top-level call (which includes associativity
// flattening).
if ( rhss[i].notation instanceof RegExp ) {
const result = new AST( language, [
new AST( language, head ),
AST.fromJSON( language, json[0], false )
] )
return top ? result.compact() : result
}
// Yes, it does, and the rule is defined by a notation array, so we
// lift out the arguments that are not constants, and adjust indices
// to remove those constants (such as the '*' in the example in the
// docs above).
json = json.filter( ( _, index ) =>
!( rhss[i][index] instanceof RegExp ) )
const result = new AST( language, [
new AST( language, head ),
...json.map( ( _, index ) => AST.fromJSON( language,
json[rhss[i].putdownToNotation[index]], false ) )
] )
return top ? result.compact() : result
}
// The concept in the head was not in the grammar, so that's an error.
throw new Error( `No notational match for ${JSON.stringify( json )}` )
}
// Internal use only
// An AST created through parsing will typically not be in compact form.
// Specifically, it will have a nested hierarchy of both syntactic and
// semantic types. For example, the expression `x+y` might not be
// represented as the simple `[ 'addition', 'x', 'y' ]` array, but as the
// unnecessarily complex array
// `[ 'Expression', [ 'NumberExpression', [ 'SumExpression', [ 'addition', 'x', 'y' ] ] ] ]`.
// Compact form removes all wrappers that serve only to label an AST with
// its syntactic type, leaving only a hierarchy of semantic information.
// This function also flattens associative operators, where an associative
// operator is defined by those that have the "associative" option set to
// a nonempty array in the concept's definition in Converter#addConcept().
compact () {
// console.log( `\t${this}` )
// Ensure valid call:
if ( this.length == 0 )
throw new Error( `Empty ASTs not allowed` )
// Base case: Leaves are already compact; return a copy
if ( this.isLeaf() ) {
// console.log( '\t\tleaf' )
return new AST( this.language, this.contents )
}
// If it's just groupers around an expression, take them off, but mark
// the result as formerly having had groupers around it:
const head = this.head().contents
if ( head.startsWith( 'GroupedAtomic' ) && this.numArgs() == 1 ) {
// console.log( '\t\tgrouped atomic' )
const result = this.arg( 0 ).compact()
result.wasGrouped = true
return result
}
// If it's a syntactic type, it should just be a wrapper around a subtype,
// so we take the wrapper off:
if ( SyntacticTypes.types.includes( head ) ) {
// Ensure one argument only:
if ( this.numArgs() != 1 )
throw new Error( `Invalid AST: ${this}` )
// If it's a leaf, recur on it:
const inner = this.arg( 0 )
if ( inner.isLeaf() ) {
// console.log( '\t\tunwrapping syntactic type around leaf' )
return inner.compact()
}
// Inner syntactic type? Do a few sanity checks, then recur on it:
const innerHead = inner.head().contents
if ( SyntacticTypes.types.includes( innerHead ) ) {
if ( inner.numArgs() != 1 )
throw new Error( `Invalid AST: ${inner}` )
if ( !SyntacticTypes.isSupertype( head, innerHead ) )
throw new Error(
`Invalid AST, ${head} not a supertype of ${innerHead}` )
// console.log( '\t\tunwrapping syntactic type around inner syntactic type' )
return inner.compact()
}
// If inner is a concept, no sanity checks required; just recur:
if ( inner.isConcept() ) {
// console.log( '\t\tunwrapping syntactic type around inner concept' )
return inner.compact()
}
// Conclusion: We have a non-leaf, non-concept, non-syntactic type,
// which is never supposed to happen:
throw new Error( `Invalid AST: ${this}` )
}
// Now we know it's a semantic type, so it will have 0 or more arguments.
// If it has 0, unwrap it so it doesn't look like a function application.
const concept = this.concept()
if ( concept.typeSequence.length == 0 && this.numArgs() == 0 ) {
// console.log( '\t\tunwrapping concept w/no args to not look like a function' )
return this.head().compact()
}
// console.log( `\t\trecurring on ${this.numArgs()} args...` )
// It has at least 1 argument, so we need to recur on it/them:
const recur = this.args().map( arg => arg.compact() )
// Apply any associativity flattening permitted by the concept:
let didFlatten = false
for ( let i = recur.length - 1 ; i >= 0 ; i-- ) {
if ( recur[i].isCompound()
&& concept.associative.includes( recur[i].head().contents ) ) {
recur.splice( i, 1, ...recur[i].args() )
didFlatten = true
// console.log( `\t\tassociativity flattening: ${recur}` )
} else {
// console.log( `\t\tno flattening: ${recur[i]} not in [${concept.associative}]` )
}
}
// Done:
const result = new AST( this.language, [ head, ...recur ] )
result.wasFlattened = didFlatten
// console.log( `\t\t\tbuilt from recursive results: ${result}` )
return result
}
/**
* Represent this AST in the given language. For example, if the AST were
* one whose {@link AST#toString string representation} were
* `addition(x,y)`, then we might call `.toLanguage(latex)` on that
* AST and expect to get `x+y`, or we might call `.toLanguage(putdown)` and
* expect to get `(+ x y)`. The parameter passed to `toLanguage()` must be
* a {@link Language} that shares the same {@link Converter} instance as
* this AST's language.
*
* This function requires the AST on which it is called to be in compact
* form, as produced by the {@link AST#compact compact()} member function.
* The behavior of this function is undefined if this requirement is not
* met.
*
* @param {Language} language - the language in which to write the
* expression stored in this AST
* @returns {String} the representation, in the specified language, of this
* AST
*/
toLanguage ( language ) {
if ( language.converter != this.language.converter )
throw new Error(
`Not part of same Converter: ${language.name}, ${this.language.name}` )
// console.log( `${this}.toLanguage( ${language.name} )` )
// Base case: A leaf should match some concept in the source language
if ( this.isLeaf() ) {
const rhs = language.rulesFor( this )[0]
// console.log( `\tleaf w/rhs = ${rhs}` )
// If the RHS is just one RegExp, it might be a non-constant pattern.
// In that case, if it can be re-used as is in the target language,
// let's do that.
if ( rhs.length == 1 && ( rhs[0] instanceof RegExp )
&& rhs[0].test( this.contents ) )
return this.contents
// We must use the language's notation for this concept:
if ( typeof( rhs.notation ) != 'string' )
throw new Error( `Invalid rule for ${this.contents}: ${rhs.notation}` )
return rhs.notation
}
// if it's just a syntactic type wrapper, ensure it's around exactly 1 item
const head = this.head().contents
if ( SyntacticTypes.types.includes( head ) ) {
// console.log( `\tsyntactic type w/head = ${head}` )
if ( this.numArgs() != 1 )
throw new Error( `Invalid AST: ${this}` )
// return the interior, possibly with groupers if the type
// hierarchy requires it (the inner is not a subtype of the outer)
const result = this.arg( 0 ).toLanguage( language )
const innerHead = this.arg( 0 ).head().contents
if ( !SyntacticTypes.types.includes( innerHead )
|| SyntacticTypes.isSupertypeOrEqual( head, innerHead ) )
return result
if ( language.groupers.length == 0 )
throw new Error( 'Cannot fix precedence in language without groupers' )
const left = language.groupers[0]
const right = language.groupers[1]
return left + result + right
}
// since it's not a syntactic type, it better be a concept
if ( !this.isConcept() )
throw new Error( 'Not a syntactic type nor a concept: ' + head )
// if it's an atomic concept with one argument, defined by a regular
// expression, just return the argument, because this is a base case
const concept = this.concept()
if ( concept.putdown instanceof RegExp ) {
// console.log( `\tatomic concept w/putdown = ${concept.putdown}` )
if ( this.numArgs() != 1 )
throw new Error( `Invalid AST: ${this}` )
return this.arg( 0 ).contents
}
// recursively compute the representation of the arguments, wrapping
// them in groupers if necessary
// console.log( `\tnon-atomic concept ${head}...recurring...` )
if ( concept.typeSequence.length != this.numArgs() ) {
// wrong arity, but is it because of associativity flattening?
// only throw an error if it could not have been from that:
if ( !this.wasFlattened || concept.typeSequence.length < 2
|| this.numArgs() < concept.typeSequence.length )
throw new Error( `Invalid type sequence for AST: ${this}` )
}
const recur = this.args().map( ( piece, index ) => {
const result = piece.toLanguage( language )
// the following line rounds down to support associative flattening
// (if there are too many args, treat extras like the last arg)
const outerType = concept.typeSequence[
Math.min(index,concept.typeSequence.length-1)]
const pieceType = piece.head()?.contents
const innerType = piece.isConcept() ? piece.concept().parentType
: pieceType
const correctNesting = !pieceType
|| outerType == pieceType
|| outerType == innerType
|| SyntacticTypes.isSupertype( outerType, innerType )
if ( language.groupers.length > 0 && !correctNesting ) {
const left = language.groupers[0]
const right = language.groupers[1]
return left + result + right
} else {
return result
}
} )
// get the default way to write that concept in this language
// and use it as a template to fill in to get the result
const rhs = language.rulesFor( this )[0]
const template = new Template( rhs.notation, rhs.variables )
// console.log( `\tabout to use template = ${template}` )
return language.linter( template.fillIn( recur ).replace( /\s+/g, ' ' ) )
}
}
source