/**
* A source map is an object that tracks a specific type of correspondence
* between two strings, reminiscent of the source maps used when compiling
* or minifying JavaScript code for use in web browsers. The first string,
* called the {@link SourceMap#source source text}, is the original text
* (which can be computer code or any other text), and remains unchanged
* throughout the life of the source map object. The second string, called
* the {@link SourceMap#modified modified text} begins life equal to the
* source text, but can be changed thereafter with
* {@link SourceMap#modify modifications/replacements}.
*
* The purpose of the source map is to keep track of which sequences of
* characters in the source text correspond to which sequences of characters
* in the modified text, so that any position in one string can be converted
* to the corresponding position in the other string.
*
* In a simple example, let's say we have the source text "I wonder what 5+9
* equals." Then we run on that text an algorithm that finds all occurrences
* of simple arithmetic expressions and computes them, replacing the original
* expression with its result, in this case producing "I wonder what 14
* equals." The source map would represent the following facts:
*
* * Characters 0 through 13 in the source text correspond to characters 0
* through 13 in the modified text. (They are "I wonder what " in both
* cases.)
* * Characters 14 through 16 in the source text ("5+9") correspond to
* characters 14 through 15 in the modified text ("14").
* * Characters 17 through 24 in the source text correspond to characters 16
* through 23 in the modified text. (They are " equals." in both cases.)
*
* In general, a source map will always be an array of such correspondences,
* each of which is a quadruple of integer indices, two into the source text
* and two into the modified text. Such data would be stored internally as
* that array of quadruples,
*
* ```json
* [
* [ 0, 13, 0, 13 ],
* [ 14, 16, 14, 15 ],
* [ 17, 24, 16, 23 ]
* ]
* ```
*
* Quadruples at even indices correspond to unmodified sequences of characters
* while quadruples at odd indices correspond to modified sequences of
* characters. In the list above, entries 0 and 2 are for unmodified text and
* entry 1 is the only modified text.
*
* To use this class, the user creates an instance, passing the original
* (unmodified) source text. Then the user can do modifications (that is,
* replacements) on the source text by calling {@link SourceMap#modify
* modify()}, taking care to make such calls in increasing order of index into
* the modified text. (See documentation for {@link SourceMap#modify
* modify()}, below.) At any point, functions such as
* {@link SourceMap#sourcePosition sourcePosition()},
* {@link SourceMap#modifiedPosition modifiedPosition()},
* {@link SourceMap#sourceLineAndColumn sourceLineAndColumn()}, and
* {@link SourceMap#modifiedLineAndColumn modifiedLineAndColumn()} are
* available to perform lookup in either direction (source to modified, or the
* reverse).
*/
export class SourceMap {
/**
* Construct a new source map with the given source text. The modified
* text will be initialized to the same text, and the initial mapping will
* map all characters in the source text to their corresponding position
* in the modified text, and vice versa.
*
* @param {*} sourceText the string to be used as the source text of this
* source map (although any object can be passed, and will be converted
* to a string)
*
* @see {@link SourceMap#source source()}
* @see {@link SourceMap#modified modified()}
*/
constructor ( sourceText ) {
this._source = `${sourceText}`
this._modified = this._source
this._nextIndex = 0
while ( this._source.indexOf(
SourceMap.markerWithIndex( this._nextIndex ) ) > -1 )
this._nextIndex
this._mapping = [ [ 0, this._source.length - 1,
0, this._modified.length - 1 ] ]
this._data = { }
}
/**
* Get the source text given at the time this source map was constructed.
* This class provides no API for altering the source text, so it should
* be the same any time this function is called.
*
* @returns {string} the source text given at construction time
*/
source () { return this._source }
/**
* Get the modified text for this source map. At construction time, this
* will be the same as the source text, but can change via calls to
* {@link SourceMap.modify modify()}.
*
* @returns {string} the modified text for this source map
*/
modified () { return this._modified }
/**
* As discussed in the documentation for {@link SourceMap#modify
* modify()}, modifications must be made in increasing order of position
* in the modified text. Thus at any point, we have a minimum index at
* which the next modification could take place, so that it falls after
* all modifications made so far. This function returns that minimum
* index.
*
* For example, in the source map described at the top of this file (in
* which "5+9" became "14" inside the source text), the next viable index
* in the modified text where a modification could be made is immediately
* after the text "14," that is, at index 16.
*
* If no modifications have been made to the source text, this function
* will return zero.
*
* @returns {integer} the earliest index at which a modification could be
* added to this source map
*/
nextModificationPosition () {
return this._mapping[this._mapping.length - 1][2]
}
// For internal use by nextMarker and isMarker
static markerText = 'SourceMapMarker'
// For internal use by nextMarker
static markerWithIndex ( index ) {
return `${SourceMap.markerText}${index}`
}
/**
* See the documentation for {@link SourceMap#modify modify()} and
* {@link SourceMap#nextMarker nextMarker()} for an explanation of how
* markers can be useful. This function is a predicate for testing
* whether an arbitrary string contains a marker.
*
* @param {string} text the text to test whether it is a marker
* @returns {boolean} whether the given text is a marker
*
* @see {@link SourceMap#nextMarker nextMarker()}
* @see {@link SourceMap#dataForMarker dataForMarker()}
*/
static isMarker ( text ) {
const marker = SourceMap.markerText
return text.substring( 0, marker.length ) == marker
&& /^[0-9]+$/.test( text.substring( marker.length ) )
}
/**
* See the documentation for {@link SourceMap#modify modify()} for an
* explanation of how markers work. That explanation mentions that it is
* useful to have a sequence of markers, each of which is a unique snippet
* of text that does not appear anywhere in the original source text.
* This function can be used as a generator, returning a different string
* each time it is called, none of which appeared in the original source
* text. In that way it can be used to generate markers that can be
* passed as the `replacement` parameter to {@link SourceMap#modify
* modfiy()}.
*
* @returns {string} a new marker
*
* @see {@link SourceMap#modify modify()}
* @see {@link SourceMap.isMarker isMarker()}
* @see {@link SourceMap#dataForMarker dataForMarker()}
*/
nextMarker () {
return SourceMap.markerWithIndex( this._nextIndex++ )
}
/**
* Source map objects contain both a source text and a modified text,
* which are equal at construction time. But through one or more calls to
* this function, the modified text can change, diverging from the source
* text. This function not only makes such modifications, but it records
* where they were made, and any data associated with them, so that
* queries can be made later about correspondences between the source and
* modified text, as documented at the top of this file.
*
* The first three parameters are straightforward, as documented below.
* However, the fourth parameter (which is optional) requires additional
* explanation. When replacing a portion of the source text, it is
* sometimes convenient to replace it not with text, but with arbitrary
* data. We support this more advanced usage as follows.
*
* * First, the newly inserted text cannot be arbitrary text, but should
* instead be a special marker that indicates the location where
* non-text data resides. To create such markers, use the
* {@link SourceMap#nextMarker nextMarker()} function.
* * Second, the fourth parameter (`data`) should be provided, and it can
* be any JavaScript object amenable to JSON encoding, which will be
* stored in this object for later querying.
* * Later, when the client processes the modified text, upon
* encountering a marker (see {@link SourceMap.isMarker isMarker()})
* and desiring to know what data it represents, the client can find
* out with a call to {@link SourceMap#dataForMarker dataForMarker()}.
*
* For example, let's say our source text was "Get me employee #16" and
* we wanted to replace "employee #16" with the full data for that
* employee, which is not text data, but rather something more complex,
* such as `{ name:'Henrietta', age:35, ... }`. Assuming we have that
* data in an object `obj`, we could make a call like the following.
*
* ```js
* sourcemap.modify( 7, 12, sourcemap.nextMarker(), obj )
* ```
*
* Notes:
*
* 1. When passing data as the optional fourth parameter, you must pass
* `sourcemap.nextMarker()` as the third parameter, because data is
* stored internally only associated with markers, and cannot be
* retrieved later otherwise.
* 2. The starting index documented below is in the modified text, not
* the source text, but if this is inconvenient for you, simply make
* use of the {@link SourceMap#modifiedPosition modifiedPosition()}
* function to convert what you have to what you need.
* 3. Modifications must be made in increasing order of index in the
* modified text. For example, if you have a replacement to do on
* characters 5 through 10 and another to do on characters 50 through
* 70, you must do them in that order, not the reverse. This class
* does not support modifications in arbitrary order.
*
* @param {integer} start the starting index, in the modified text, where
* this next modification should be recorded
* @param {integer} length the length of the text to be replaced, starting
* at the index `start`
* @param {string} replacement the text with which to replace the chosen
* portion of the source text
* @param [Object] data arbitrary JSON data to store in the modified
* text, as documented above
*/
modify ( start, length, replacement, data = { } ) {
const end = start + length - 1
const last = this._mapping[this._mapping.length-1]
const origStart = this.sourcePosition( start )
if ( origStart < last[0] || origStart > last[1] )
throw new Error( 'Modifications must be done in increasing order' )
const origEnd = this.sourcePosition( end )
if ( origEnd < last[0] || origEnd > last[1] )
throw new Error( 'Modifications must be done in increasing order' )
this._mapping.pop()
this._mapping.push( [ last[0], origStart - 1, last[2], start - 1 ] )
this._mapping.push( [ origStart, origEnd,
start, start + replacement.length - 1 ] )
this._mapping.push( [
origEnd + 1, last[1], start + replacement.length,
last[3] - ( end - start - replacement.length + 1 ) ] )
const original = this._modified.substring( start, start + length )
this._modified = this._modified.substring( 0, start )
+ replacement
+ this._modified.substring( start + length )
if ( SourceMap.isMarker( replacement ) )
this._data[replacement] = { original, ...data }
}
/**
* See the documentation for {@link SourceMap#modify modify()} for an
* explanation of how markers work. That explanation mentions that
* arbitrary data can be associated with a marker as part of a call to the
* {@link SourceMap#modify modify()} function. To later query the data
* associated with a given marker, use this function.
*
* @param {string} marker the text for the marker whose data should be
* looked up
* @returns {Object} the data associated with the marker, as a JavaScript
* object amenable to JSON encoding
*
* @see {@link SourceMap#modify modify()}
* @see {@link SourceMap.isMarker isMarker()}
* @see {@link SourceMap#nextMarker nextMarker()}
*/
dataForMarker ( marker ) { return this._data[marker] }
/**
* The major purpose of the SourceMap class is to track the correspondence
* of character positions between its source text and its modified text.
* This function can be used to do a lookup from the latter to the former;
* you provide a character position in the modified text and it returns the
* corresponding position in the source text.
*
* If the character position sits inside a section of modified text, there
* may not be an exact correspondence of which character in the source
* text relates to it; for example, in the source map mentioned at the top
* of this documentation page, which characters in "5+9" correspond to
* each character in the text "14?" In such cases, the first character in
* the corresponding *section* is returned. So both characters in "14"
* would be said to correspond to the first character in "5+9."
*
* If the character position sits inside a section of unmodified text,
* then there is an exact copy of that unmodified text in the source text,
* and thus the corresponding position with it will be returned.
*
* If an invalid index is provided, undefined is returned. Indices into
* both the source and modified texts are zero-based, so the first valid
* index is 0 and the last is the length of the text minus 1.
*
* @param {integer} modifiedPosition the position in the modified text for
* which a lookup should be done in the source text
* @returns {integer} the position in the source text corresponding to the
* given position in the modified text
*
* @see {@link SourceMap#sourceLineAndColumn sourceLineAndColumn}
* @see {@link SourceMap#modifiedPosition modifiedPosition}
* @see {@link SourceMap#modifiedLineAndColumn modifiedLineAndColumn}
*/
sourcePosition ( modifiedPosition ) {
const row = this._mapping.find( row =>
row[2] <= modifiedPosition && modifiedPosition <= row[3] )
return row === undefined ? undefined :
row[3] - row[2] == row[1] - row[0] ?
row[0] + modifiedPosition - row[2] : row[0]
}
/**
* See the documentation for {@link SourceMap#sourcePosition
* sourcePosition()}; this function is (approximately) the inverse to that
* function. The only difference is, as documented in that function, we
* cannot map precisely each character within a section of modified text,
* and thus we map to the first charcter in the corresponding range. For
* example, in the source map mentioned at the top of this page, each
* character in the source text "5+9" would map to the first character in
* the modified text "14."
*
* @param {integer} sourcePosition the position in the source text for
* which a lookup should be done in the modified text
* @returns {integer} the position in the modified text corresponding to
* the given position in the source text
*
* @see {@link SourceMap#modifiedLineAndColumn modifiedLineAndColumn}
* @see {@link SourceMap#sourcePosition sourcePosition}
* @see {@link SourceMap#sourceLineAndColumn sourceLineAndColumn}
*/
modifiedPosition ( sourcePosition ) {
const row = this._mapping.find( row =>
row[0] <= sourcePosition && sourcePosition <= row[1] )
return row === undefined ? undefined :
row[3] - row[2] == row[1] - row[0] ?
row[2] + sourcePosition - row[0] : row[2]
}
/**
* See the documentation for {@link SourceMap#sourcePosition
* sourcePosition()}; this function behaves similarly, except that it
* takes as input a line number and column number in the modified text,
* and returns a line number and column number in the source text.
*
* Unlike character indices, line and column numbers start counting at 1,
* so the first character in the text (at index 0) would be in line 1 and
* at column 1.
*
* @param {integer} line the line number in the modified text whose
* position in the source text should be looked up
* @param {integer} column the column number in the modified text whose
* position in the source text should be looked up
* @returns {Array} a pair of integers, the line and column number in the
* source text that correspond to the given line and column number in
* the modified text
*
* @see {@link SourceMap#sourcePosition sourcePosition}
* @see {@link SourceMap#modifiedLineAndColumn modifiedLineAndColumn}
* @see {@link SourceMap#modifiedPosition modifiedPosition}
*/
sourceLineAndColumn ( line, column ) {
return SourceMap.positionToLineAndColumn(
this.sourcePosition( SourceMap.lineAndColumnToPosition(
line, column, this._modified ) ),
this._source
)
}
/**
* See the documentation for {@link SourceMap#modifiedPosition
* modifiedPosition()}; this function behaves similarly, except that it
* takes as input a line number and column number in the source text,
* and returns a line number and column number in the modified text.
*
* Unlike character indices, line and column numbers start counting at 1,
* so the first character in the text (at index 0) would be in line 1 and
* at column 1.
*
* @param {integer} line the line number in the source text whose position
* in the modified text should be looked up
* @param {integer} column the column number in the modified text whose
* position in the source should be looked up
* @returns {Array} a pair of integers, the line and column number in the
* modified text that correspond to the given line and column number in
* the source text
*
* @see {@link SourceMap#sourceLineAndColumn sourceLineAndColumn}
* @see {@link SourceMap#modifiedPosition modifiedPosition}
* @see {@link SourceMap#modifiedPosition modifiedPosition}
*/
modifiedLineAndColumn ( line, column ) {
return SourceMap.positionToLineAndColumn(
this.modifiedPosition( SourceMap.lineAndColumnToPosition(
line, column, this._source ) ),
this._modified
)
}
// For internal use by sourceLineAndColumn and modifiedLineAndColumn
static positionToLineAndColumn = ( position, text ) => {
if ( position > text.length || position < 0
|| typeof( position ) != 'number' ) return undefined
const start = text.substring( 0, position )
const lines = start.split( '\n' )
return [ lines.length, lines[lines.length-1].length + 1 ]
}
// For internal use by sourceLineAndColumn and modifiedLineAndColumn
static lineAndColumnToPosition = ( line, column, text ) => {
const lines = text.split( '\n' )
for ( let i = 0, totalSoFar = 0 ; i < lines.length ; i++ ) {
const countNewline = i < lines.length - 1 ? 1 : 0
if ( i + 1 == line )
return column >= 1
&& column <= lines[i].length + countNewline ?
totalSoFar + column - 1 : undefined
totalSoFar += lines[i].length + countNewline
}
}
/**
* A debugging routine useful when working with code in which you care
* about the specific character indices and/or line and column numbers.
* Call this routine to print the code to the console, with a line and
* character number added at the start of each line, using the format
* `L_C_:` with the blanks replaced by line and character numbers,
* respectively.
*
* For example, `"function ( x ) {\n\treturn 3;\n}"` would be printed
* as follows. Note that these are *not* line and *column* numbers,
* but rather line numbers and *character indices.*
*
* ```
* L1C0: function ( x ) {
* L2C17: return 3;
* L3C27: }
* ```
*
* @param {string} code arbitrary text to be displayed on the console
* (typically some kind of code, such as putdown notation or JSON)
*/
static debugCode ( code ) {
const lines = code.split( '\n' )
for ( let i = 0, totalSoFar = 0 ; i < lines.length ; i++ ) {
const line = lines[i]
console.log( `L${i+1}C${totalSoFar}: ${line}` )
totalSoFar += line.length + 1
}
}
}
source