import { LogicConcept } from '../logic-concept.js'
import { Application } from '../application.js'
import { BindingExpression } from '../binding-expression.js'
import { Symbol as LurchSymbol } from '../symbol.js'
import {
expressionFunctionApplication, isAnEF, newEF, parametersOfEF, bodyOfEF
} from './expression-functions.js'
import { metavariable } from './metavariables.js'
/**
* ## What are de Bruijn indices?
*
* One way to express bound variables without giving them explicit names is to
* use *de Bruijn indices,* a technique that replaces symbols with numbers. The
* standard version of this method applies to the $\lambda$-calculus, replacing
* a bound variable $x$ with a natural number $n$ that is equal to the number of
* levels of nested $\lambda$ expressions one must walk up the syntax tree to
* find the one that binds $x$.
*
* For example, $\lambda f.\lambda g.(f~(g~x))$ could be expressed as
* $\lambda.\lambda.(2~(1~x))$. Notice these properties of the example:
*
* 1. The inner $f$ of the original expression is replaced with a 2, because
* one must walk out two "$\lambda$ levels" to find the $\lambda$ that
* originally bound $f$.
* 1. The inner $g$ is now a 1 because it was bound by the first $\lambda$
* above it.
* 1. The bound variable names are not needed, because the order of
* quantification is clear from the indices.
*
* Had the example been something like $(\circ~(\lambda~x.x)~(\lambda~y.y))$,
* it would have become $(\circ~(\lambda.1)~(\lambda.1))$, showcasing how de
* Bruijn indices reduce $\alpha$-equivalence to mere expression equality.
*
* ## Purpose of this module
*
* This module provides tools to encode {@link MathConcept} instances using de
* Bruijn indices, but we make the following changes to extend the method from
* the standard version introduced above to a version that supports the full
* range of {@link MathConcept MathConcepts}.
*
* 1. It is not only $\lambda$ expressions that are supported, but any kind of
* {@link BindingExpression}. For example, we can encode $\forall~x.(P~x)$
* as $\forall.(P~1)$.
* 2. Because a {@link BindingExpression} can bind multiple variables, we must
* distinguish them, so our indices will be pairs, one index for the binding
* level (as above) and one index for which position within that binding was
* occupied by the symbol being encoded. For example, $\forall x,y.(>~x~y)$
* can be encoded as $\forall.(>~(1,1)~(1,2))$, where I'm using the
* standard notation for pairs. This applies even if the binder binds only
* one variable, so the example from item 1., above, would actually be
* encoded as $\forall.(P~(1,1))$.
* 3. Unlike the original de Bruijn notation, we will use zero-based indexing,
* to be consistent with everything else in our codebase, which uses
* zero-based indexing. Thus the two examples from item 2., above, would
* actually be encoded as $\forall.(>~(0,0)~(0,1))$ and $\forall.(P~(0,0))$,
* respectively.
*
* The notation used above is to illustrate the general concept of the de Bruijn
* conversion only. Obviously pairs like $(a,b)$ and bindings with no bound
* symbols like $\lambda.1$ are not valid {@link LogicConcept#fromPutdown
* putdown notation}, so they do not represent the actual encoding. The details
* of that encoding appear in the next section.
*
* ## Functions in this module
*
* Given a {@link Symbol}, we can compute its encoding as a pair of de Bruijn
* indices with {@link module:deBruijn.encodeSymbol encodeSymbol()}. We
* represent that pair as another {@link Symbol} whose name is a JSON encoding
* that includes the pair of indices, and that has the original symbol name
* stored as an attribute, so that the conversion can
* be inverted later (with {@link module:deBruijn.decodeSymbol decodeSymbol()}).
* However, if you wish to fetch just the indices of the encoded symbol, you can
* do so with {@link module:deBruijn.encodedIndices encodedIndices()}.
*
* The {@link module:deBruijn.encodeSymbol encodeSymbol()} function is not,
* however, of much use to clients, who typically want to encode entire
* {@link Expression Expressions}, not just individual symbols within them.
* Given an {@link Expression}, we can compute its de Bruijn encoding using
* {@link module:deBruijn.encodeExpression encodeExpression()}. It yields
* another expression with the following properties.
*
* * At each position where a {@link Symbol} had sat, its encoding (under
* {@link module:deBruijn.encodeSymbol encodeSymbol()}) will sit instead.
* * The entire resulting expression contains no
* {@link BindingExpression BindingExpressions}.
* * At each position where a {@link BindingExpression} had sat as a
* subexpression, instead an {@link Application} now sits, of the form
* `(A B)`, where `A` is a special symbol indicating that a
* {@link BindingExpression} has been encoded, and `B` is the encoding of the
* body of the original {@link BindingExpression}.
* * Each such `A` will be decorated with an attribute that records the
* original names of the bound symbols, so that this encoding is invertible
* (using {@link module:deBruijn.decodeExpression decodeExpression()}).
*
* To give one full, explicit example, consider the {@link Expression} (in
* {@link LogicConcept#fromPutdown putdown notation})
* `(forall (x y) , (> x y))`. Its de Bruijn encoding, according to this
* module, would be the following. I will use the symbol `"LDE DB"` as the
* special symbol meaning "encoded binding" and also as the attribute key for
* all de Bruijn attributes. Some JSON encodings have been simplified for
* readability, as you can see below.
*
* ```
* (forall
* ("LDE DB" +{"LDE DB":"['x','y']"} // list of bound variable names
* (>
* "['LDE DB',0,0]" +{"LDE DB":"JSON encoding of original x symbol"}
* "['LDE DB',0,1]" +{"LDE DB":"JSON encoding of original y symbol"}
* )
* )
* )
* ```
*
* The module also provides a {@link module:deBruijn.free freeness test} for de
* Bruijn indices, which returns true for an index $(n,m)$ iff there are at
* least $n+1$ encoded bindings surrounding the symbol.
*
* ## Coordinating with matching
*
* There are two exceptions to the above encoding description, each of which
* helps this module integrate correctly with the larger module in which it
* sits, {@link module:Matching the Matching module}.
*
* First, when encoding a symbol, if the symbol is a metavariable, then don't
* encode it at all, but leave it in its original form.
*
* Second, one of the special symbols in the matching package, the one that
* marks Expression Functions Applications, is not encoded, but is left
* unchanged under the de Bruijn encoding. That way, Expression Function
* Applications retain their meanings under the de Bruijn encoding.
*
* Finally, we do not need to apply the same protection to the Expression
* Function symbol, because we will not be applying the de Bruijn encoding to
* Expression Functions, and it would break them anyway, since it removes all
* bindings. But when decoding the image of a metavariable in a solution, an
* Expression Function may be present, so the
* {@link module:deBruijn.decodeExpression decodeExpression()} function recurs
* inside such structures.
*
* ## Expression equality
*
* As mentioned above, one of the benefits of de Bruijn indices is that two
* expressions are $\alpha$-equivalent iff their de Bruijn encodings are exactly
* equal. However, the implementation above mentions that our encoding function
* stores the original names of the bound symbols as attributes so that the
* encoding is invertible. Thus if we encode the identity function
* $\lambda x.x$ and also encode the identity function $\lambda y.y$, although
* the resulting expressions would seem equal, both being something like
* $\lambda.(0,0)$, but their attributes would distinguish them:
*
* ```
* // Encoding of (lambda x , x):
* (lambda
* ("LDE DB" +{"LDE DB":['x']}
* "['LDE DB',0,0]" +{"LDE DB":"JSON encoding of the symbol x"}
* )
* )
* // Encoding of (lambda y , y):
* (lambda
* ("LDE DB" +{"LDE DB":['y']}
* "['LDE DB',0,0]" +{"LDE DB":"JSON encoding of the symbol y"}
* )
* )
* ```
*
* Without the attributes, we would be comparing
* `(lambda ("LDE DB" "['LDE DB',0,0]"))` to itself. With the attributes, we
* can still distinguish formerly-$x$ from formerly-$y$, which is undesirable.
*
* Thus we need a way to compare two expressions for equality, but ignore these
* hidden attributes that are present only to enable the inversion of the de
* Bruijn encoding. We define the {@link module:deBruijn.equal equal()}
* function for this purpose. It simply calls the {@link MathConcept#equals
* equals()} function in the {@link MathConcept} class, passing an optional
* argument that tells it to ignore the attributes in question when doing its
* comparison.
*
* @module deBruijn
*/
/**
* This string is used for several purposes in this module.
*
* * Encoded {@link Symbol Symbols} use this string as the key for the
* attribute that stores their original text.
* * Encoded {@link BindingExpression BindingExpressions} use this string as
* the key for the attribute that stores their original list of bound symbol
* names.
* * Encoded {@link BindingExpression BindingExpressions} are
* {@link Application Applications} whose operator is a symbol with this
* string as its text.
*/
export const deBruijn = 'LDE DB'
// A symbol whose value is the above constant
const deBruijnSymbol = new LurchSymbol( deBruijn )
// Given any symbol, compute the i and j that form its index pair (i,j),
// defined in the documentation at the top of this module.
// Or, if the symbol is free where it sits, return undefined.
const symbolToIndices = symbol => {
const binders = symbol.ancestorsSatisfying(
a => a instanceof BindingExpression )
for ( let i = 0 ; i < binders.length ; i++ ) {
const boundSymbols = binders[i].boundSymbols()
for ( let j = 0 ; j < boundSymbols.length ; j++ )
if ( boundSymbols[j].text() == symbol.text() )
return [ i, j ]
}
}
/**
* This function takes an input {@link Symbol} and returns an output
* {@link Symbol} whose name encodes the de Bruijn index of the first symbol.
* Recall from the definition at the top of this module that, in this module, a
* de Bruijn index is a pair of natural numbers. Note that the context of the
* input {@link Symbol} (its ancestor chain, and whether it is bound by any of
* those ancestors) is what determines the result of this function.
*
* The encoded symbol returned by this function has an attribute that stores the
* original name of this symbol, so that this encoding can be reversed by
* {@link module:deBruijn.decodeSymbol decodeSymbol()}. For two important
* details about how symbol encoding coordinates with the Matching module, see
* the documentation at the top of this module.
*
* @function
* @param {Symbol} symbol the symbol to be encoded
* @returns {Symbol} a new symbol whose name encodes the de Bruijn indices of
* the input
* @see {@link module:deBruijn.encodedIndices encodedIndices()}
* @see {@link module:deBruijn.decodeSymbol decodeSymbol()}
* @see {@link module:deBruijn.encodeExpression encodeExpression()}
*/
export const encodeSymbol = symbol => {
// Exceptions: Don't modify special symbol used for EFAs, nor metavariables:
if ( symbol.equals( expressionFunctionApplication )
|| symbol.isA( metavariable ) )
return symbol.copy()
// Okay, now do the normal de Bruijn encoding:
const indices = symbolToIndices( symbol )
return new LurchSymbol( JSON.stringify(
indices ? [ deBruijn, ...indices ] : [ deBruijn, symbol.text() ]
) ).attr( [ [ deBruijn, symbol.toJSON() ] ] )
}
/**
* This function takes a {@link Symbol} encoded by the
* {@link module:deBruijn.encodeSymbol encodeSymbol()} function and returns the
* de Bruijn indices stored within its name.
*
* @function
* @param {Symbol} symbol a symbol that was de Bruijn-encoded
* @returns {number[]} a pair of natural numbers, the de Bruijn indices of the
* input
* @see {@link module:deBruijn.encodeSymbol encodeSymbol()}
* @see {@link module:deBruijn.decodeSymbol decodeSymbol()}
*/
export const encodedIndices = symbol => {
if ( !( symbol instanceof LurchSymbol ) ) return
try {
const parts = JSON.parse( symbol.text() )
return parts.length == 3 && parts[0] == deBruijn ?
parts.slice( 1 ) : undefined
} catch {
return undefined
}
}
/**
* Alter the name of a de Bruijn index (an encoded {@link Symbol}) so that it
* still encodes a de Bruijn index, but the two components have been adjusted
* (up or down) by the differences given in the latter two arguments. That is,
* a de Bruijn index $(i,j)$ can be adjusted with $(\Delta_i,\Delta_j)$ to
* become $(i+\Delta_i,j+\Delta_j)$.
*
* Note that only free indices are adjusted, not bound indices. For example, if
* `(forall x , (= x t))` is represented as an encoded binding wrapped around
* `(= (0,0) t)` and we increment indices, we would not want the bound `x`
* in the first expression to become free. The reason for this is that we are
* typically adjusting indices in an expression that will be moved/substituted
* inside another expression, and we need to use the standard de Bruijn method
* of avoiding "capture" by ensuring that free indices stay free even in the new
* context.
*
* If the first argument is an {@link Application}, this function distributes
* itself across the {@link Application}'s children. If it is neither a
* {@link Symbol} nor an {@link Application}, this function does nothing.
*
* @function
* @param {Symbol} target an encoded de Bruijn index
* @param {integer} deltaI the amount by which to adjust the first component of
* the index
* @param {integer} deltaJ the amount by which to adjust the second component of
* the index
* @param {integer} [depth] for recursive use only; clients should omit this.
* It is used to avoid capture by tracking the number of bindings already
* traversed in the recursion.
*/
export const adjustIndices = ( target, deltaI, deltaJ, depth = 0 ) => {
if ( target instanceof Application ) {
const depthAdjustment = isEncodedBinding( target ) ? 1 : 0
return target.children().forEach( child =>
adjustIndices( child, deltaI, deltaJ, depth + depthAdjustment ) )
}
const original = encodedIndices( target )
if ( !original ) return
target.setAttribute( 'symbol text', JSON.stringify( [
deBruijn,
original[0] + ( original[0] >= depth ? deltaI : 0 ),
original[1] + ( original[0] >= depth ? deltaJ : 0 )
] ) )
}
/**
* This function takes a {@link Symbol} encoded by the
* {@link module:deBruijn.encodeSymbol encodeSymbol()} function and returns (a
* copy of) the original symbol that was passed to the encoding function. The
* de Bruijn indices encoded in the input are discarded, and its original name
* (stored in an attribute) is restored.
*
* If it receives a symbol that has no de Bruijn encoding information stored in
* an attribute, it returns the symbol untouched.
*
* @function
* @param {Symbol} symbol a symbol that was de Bruijn-encoded
* @returns {Symbol} a copy of the original {@link Symbol} that was encoded to
* produce the input `symbol`
* @see {@link module:deBruijn.encodeSymbol encodeSymbol()}
* @see {@link module:deBruijn.encodedIndices encodedIndices()}
* @see {@link module:deBruijn.decodeExpression decodeExpression()}
*/
export const decodeSymbol = symbol => {
const serialized = symbol.getAttribute( deBruijn )
return serialized ? LogicConcept.fromJSON( serialized ) : symbol.copy()
}
/**
* This function takes an input {@link Expression} and returns an output
* {@link Expression} that is the de Bruijn-encoded form of the input. The full
* list of guarantees this function provides, and the properties its output will
* have, is given in the documentation at the top of this module.
*
* In summary, all of its {@link BindingExpression BindingExpressions} have been
* replaced with {@link Application Applications} with no bound symbols, and all
* formerly-bound symbols have been replaced with their encoded versions as de
* Bruijn indices, as computed by the
* {@link module:deBruijn.encodeSymbol encodeSymbol()}) function.
*
* This operation can be reversed by applying the
* {@link module:deBruijn.decodeExpression decodeExpression()} function.
*
* @function
* @param {Expression} expression the expression to be encoded
* @returns {Expression} the encoded version, with the properties described
* above
* @see {@link module:deBruijn.encodeSymbol encodeSymbol()}
* @see {@link module:deBruijn.decodeExpression decodeExpression()}
*/
export const encodeExpression = expression =>
expression instanceof LurchSymbol ? encodeSymbol( expression ) :
expression instanceof Application ? new Application(
...expression.children().map( encodeExpression ) ) :
// therefore expression instanceof BindingExpression:
new Application(
deBruijnSymbol.copy().attr( [ [
deBruijn, expression.boundSymbols().map( symbol => symbol.toJSON() )
] ] ),
encodeExpression( expression.body() ) )
/**
* It is often necessary to test whether an {@link Expression} is the encoding
* (under the de Bruijn encoding implemented in
* {@link module:deBruijn.encodeExpression encodeExpression()}) of a
* {@link BindingExpression}. For example, we may wish to look at a de Bruijn
* index and discern which of its ancestors binds it, by traversing the ancestor
* chain and inspecting which ones represent bindings. This function answers
* that question.
*
* @param {LogicConcept} expression the expression to test
* @returns {boolean} whether the expression is the encoding of a
* {@link BindingExpression} under the de Bruijn encoding implemented in
* {@link module:deBruijn.encodeExpression encodeExpression()}
*/
export const isEncodedBinding = expression =>
( expression instanceof Application )
&& equal( expression.child( 0 ), deBruijnSymbol )
/**
* This function takes an {@link Expression} encoded by the
* {@link module:deBruijn.encodeExpression encodeExpression()} function
* and returns (a copy of) the original expression that was passed to the
* encoding function.
*
* @function
* @param {Symbol} symbol an expression that was de Bruijn-encoded
* @returns {Symbol} a copy of the original {@link Expression} that was encoded
* to produce the input `expression`
* @see {@link module:deBruijn.encodeExpression encodeExpression()}
* @see {@link module:deBruijn.decodeSymbol decodeSymbol()}
*/
export const decodeExpression = expression => {
// Case 1: Symbols have their own decoding routine
if ( expression instanceof LurchSymbol )
return decodeSymbol( expression )
// Case 2: Expression Functions
if ( isAnEF( expression ) )
return newEF(
...parametersOfEF( expression ).map( v => v.copy() ),
decodeExpression( bodyOfEF( expression ) )
)
// Case 3: (Other) Applications
if ( expression instanceof Application ) {
if ( equal( expression.child( 0 ), deBruijnSymbol ) ) {
// subcase 1: application that encodes what used to be a binding
const symbolNames = expression.child( 0 ).getAttribute( deBruijn )
if ( !symbolNames )
throw new Error( 'Missing de Bruijn attribute on Application' )
return new BindingExpression(
...symbolNames.map( json => LogicConcept.fromJSON( json ) ),
decodeExpression( expression.child( 1 ) ) )
} else {
// subcase 2: application that is just an application
return new Application(
...expression.children().map( decodeExpression ) )
}
}
// therefore expression instanceof BindingExpression, which means this is
// not a correctly-encoded de Bruijn expression
throw new Error( 'No bindings permitted in de Bruijn encodings' )
}
/**
* See the documentation at the top of this module for why it is important to be
* able to test two expressions for equivalence, ignoring the attributes that
* may have been added during the de Bruijn encoding process that produced those
* two expressions. This function does exactly that, using the
* `attributesToIgnore` feature of the {@link MathConcept#equals equals()}
* function in the {@link MathConcept} class.
*
* @function
* @param {Expression} expression1 the left-hand side of the equality test; this
* function asks if this is equal to `expression2`
* @param {Expression} expression2 the right-hand side of the equality test;
* this function asks if this is equal to `expression1`
* @returns {boolean} whether the two {@link Expression Expressions} are equal
* except for any attributes that were added as part of the de Bruijn encoding
* process
*/
export const equal = ( expression1, expression2 ) =>
expression1.equals( expression2, [ deBruijn ] )
/**
* A de Bruijn index, as defined at the top of this module, is a symbol encoding
* a pair of natural numbers, the first of which says how many bindings one must
* pass as one walks up the ancestor chain of this symbol before we encounter
* the binding that binds this symbol. Thus a de Bruijn index is free if the
* number of bindings in its ancestor chain is less than or equal to that first
* index.
*
* This function tests that and returns true if that freeness condition is met,
* false if the first component of the de Bruijn index is smaller than the
* number of bindings above the symbol, and undefined if the argument given is
* not a de Bruijn index. Note that, because we are working with
* {@link Expressions} that have been encoded with
* {@link module:deBruijn.encodeExpression encodeExpression()}, a binding is not
* a {@link BindingExpression}, but an encoded version thereof.
*
* @param {Symbol} symbol the symbol to test for freeness
* @returns {boolean} whether the symbol is a free de Bruijn index
*/
export const free = symbol => {
const indices = encodedIndices( symbol )
if ( !indices ) return undefined
const encodedBindings = symbol.ancestorsSatisfying( isEncodedBinding )
return encodedBindings.length <= indices[0]
}
/**
* Take two subexpressions $A,B$ that share the same parent expression and are
* in de Bruijn form. We may want to ask whether a copy of $A$ occurs in $B$,
* but we cannot do so just by checking simple structural equality, as in the
* following example.
*
* Take the expression $\forall x,(Q(x)\wedge\forall y,(Q(x)\vee P(x,y)))$.
* Let $A=Q(x)$ (the one appearing before the $\wedge$) and
* $B=\forall y,(Q(x)\vee P(x,y))$. In de Bruijn form, we would have the outer
* expression $\forall(Q((0,0))\wedge\forall(Q((1,0)\vee P((1,0),(0,0))))$ and
* $A=Q((0,0))$ and $B=Q((1,0))\vee P((1,0),(0,0))$. By comparing syntax trees
* alone, we would not say that $A$ occurs in $B$. But clearly in non-de Bruijn
* form, it does. Thus we need a more sophisticated way to count occurrences,
* when the expressions are in de Bruijn form.
*
* Note that this comparison depends upon $A$ and $B$ having a common parent, or
* at least that any binding expression containing $A$ also contains $B$, so
* that symbols that are free in one (such as the $(0,0)$ in $A$) have the same
* meaning that they do in $B$ (referring to the outermost $\forall$ that is an
* ancestor of both $A$ and $B$). This property of $A$ and $B$ is a
* precondition on this routine; its results may be meaningless if this
* condition does not hold.
*
* @param {LogicConcept} ofThis the expression to search for in the other
* @param {LogicConcept} inThis the expression in which to search
* @returns {Number} the number of occurrences found
*/
export const numberOfOccurrences = ( ofThis, inThis ) => {
if ( equal( ofThis, inThis ) ) return 1
if ( isEncodedBinding( inThis ) ) {
const copy = ofThis.copy()
adjustIndices( copy, 1, 0 )
return numberOfOccurrences( copy, inThis.lastChild() )
}
return inThis.children().map(
child => numberOfOccurrences( ofThis, child )
).reduce( ( a, b ) => a + b, 0 )
}
source