define tokens for lexing set [concat], [dissoc] operations

This commit is contained in:
Sorrel Bri 2020-03-15 21:40:31 -07:00
parent a7dad0d3e5
commit 7c75be543f
3 changed files with 322 additions and 5 deletions

View file

@ -73,6 +73,7 @@ export const lexer = moo.states({
}, },
setDefinition: { setDefinition: {
comment: /;.*$/,
setIdentifier: { match: /[A-Z]+[A-Z_]*/ }, setIdentifier: { match: /[A-Z]+[A-Z_]*/ },
openCurlyBracket: { match: /\{/, push: 'setOperation' }, openCurlyBracket: { match: /\{/, push: 'setOperation' },
equal: /=/, equal: /=/,
@ -87,13 +88,15 @@ export const lexer = moo.states({
setOperation: { setOperation: {
closeCurlyBracket: { match: /\}/, pop: true }, closeCurlyBracket: { match: /\}/, pop: true },
// ! restrict identifiers // ! restrict identifiers
keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield'], type: moo.keywords({ keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield', 'concat', 'dissoc'], type: moo.keywords({
'kw-set-not': 'not' , 'kw-set-not': 'not' ,
'kw-set-and': 'and' , 'kw-set-and': 'and' ,
'kw-set-or': 'or' , 'kw-set-or': 'or' ,
'kw-set-nor': 'nor' , 'kw-set-nor': 'nor' ,
'kw-set-in': 'in' , 'kw-set-in': 'in' ,
'kw-set-yield': 'yield' , 'kw-set-yield': 'yield' ,
'kw-set-concat': 'concat',
'kw-set-dissoc': 'dissoc'
}) })
}, },
identifier: /[A-Z]+[A-Z_]+/, identifier: /[A-Z]+[A-Z_]+/,
@ -108,6 +111,7 @@ export const lexer = moo.states({
commaOperation: { commaOperation: {
// if comma is detected during a definition, the commaOperation consumes all white space and pops back to definition // if comma is detected during a definition, the commaOperation consumes all white space and pops back to definition
// this prevents popping back to main // this prevents popping back to main
comment: /\s*;.*$/,
whiteSpace: { match: /\s+/, lineBreaks: true, pop: true }, whiteSpace: { match: /\s+/, lineBreaks: true, pop: true },
newLine: { match: /\n/, lineBreaks: true, pop: true } newLine: { match: /\n/, lineBreaks: true, pop: true }
} }

View file

@ -293,5 +293,306 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] },
{ type: 'whiteSpace', value: ' ', }, { type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', }, { type: 'closeCurlyBracket', value: '}', },
] ]
} },
setOperationsJoin: {
latl: `
; ---- set join operations non-mutable!
set SET_C = { SET_A not SET_B }, ; left anti join
SET_D = { SET_A and SET_B }, ; inner join
SET_E = { SET_A or SET_B }, ; full outer join
SET_F = { not SET_A }, ; = { GLOBAL not SET_A }
SET_G = { not SET_A nor SET_B } ; = { GLOBAL not { SET_A or SET_B } }`,
tokens: [
{ type: 'whiteSpace', value: '\n', },
{ type: 'comment', value: '; ---- set join operations non-mutable! ', },
{ type: 'whiteSpace', value: '\n', },
{ type: 'kw-set', value: 'set', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'setIdentifier', value: 'SET_C', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-not', value: 'not', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_B', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; left anti join', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_D', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-and', value: 'and', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_B', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; inner join', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_E', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-or', value: 'or', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_B', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; full outer join', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_F', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-not', value: 'not', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; = { GLOBAL not SET_A }', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_G', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-not', value: 'not', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-nor', value: 'nor', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_B', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'comment', value: '; = { GLOBAL not { SET_A or SET_B } }', }
]
},
setOperations: {
latl: `
; ---- set character operations - non-mutable!
set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and y is a filtering character
SET_C = { SET_A yield [ Xy ] }, ; CONCATENATE: performs transformation with (prepended or) appended character
SET_D = { SET_A yield [ X concat y ] },
SET_E = { SET_A yield [ y concat X ] },
SET_F = { SET_A yield y[ X ] }, ; DISSOCIATE: performs transformation removing prepended (or appended) character
SET_G = { SET_A yield y dissoc [ X ] },
SET_H = { SET_A yield [ X ] dissoc y },
SET_I = { [ Xy ] in SET_A yield [ X ]y } ; combined FILTER and DISSOCIATE`,
tokens: [
{ type: 'whiteSpace', value: '\n', },
{ type: 'comment', value: '; ---- set character operations - non-mutable!', },
{ type: 'whiteSpace', value: '\n', },
{ type: 'kw-set', value: 'set', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'setIdentifier', value: 'SET_B', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-in', value: 'in', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; FILTER: where X is any character and y is a filtering character', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_C', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; CONCATENATE: performs transformation with (prepended or) appended character', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_D', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-concat', value: 'concat', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_E', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-concat', value: 'concat', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_F', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'phone', value: 'y', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'comment', value: ' ; DISSOCIATE: performs transformation removing prepended (or appended) character', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_G', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-dissoc', value: 'dissoc', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_H', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-dissoc', value: 'dissoc', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'comma', value: ',', },
{ type: 'whiteSpace', value: '\n ', },
{ type: 'setIdentifier', value: 'SET_I', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'equal', value: '=', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openCurlyBracket', value: '{', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-in', value: 'in', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'SET_A', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'kw-set-yield', value: 'yield', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeSquareBracket', value: '[', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'identifier', value: 'X', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'openSquareBracket', value: ']', },
{ type: 'phone', value: 'y', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'closeCurlyBracket', value: '}', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'comment', value: '; combined FILTER and DISSOCIATE', }
]
},
} }

View file

@ -35,19 +35,19 @@ describe('lexer', () => {
it('lexes multiple set definitions with comma operator', () => { it('lexes multiple set definitions with comma operator', () => {
const { latl, tokens } = assertionData.setDefinition; const { latl, tokens } = assertionData.setDefinition;
const stream = getStream(latl); const stream = getStream(latl);
expect(stream).toStrictEqual(tokens); expect(stream).toStrictEqual(tokens);
}); });
it('lexes set definition with alias', () => { it('lexes set definition with alias', () => {
const { latl, tokens } = assertionData.setAliasDefinition; const { latl, tokens } = assertionData.setAliasDefinition;
const stream = getStream(latl); const stream = getStream(latl);
expect(stream).toStrictEqual(tokens); expect(stream).toStrictEqual(tokens);
}); });
it('lexes set definition with set join', () => { it('lexes set definition with set join', () => {
const { latl, tokens } = assertionData.setDefinitionJoin; const { latl, tokens } = assertionData.setDefinitionJoin;
const stream = getStream(latl); const stream = getStream(latl);
expect(stream).toStrictEqual(tokens); expect(stream).toStrictEqual(tokens);
}); });
@ -55,5 +55,17 @@ describe('lexer', () => {
const { latl, tokens } = assertionData.setDefinitionYield; const { latl, tokens } = assertionData.setDefinitionYield;
const stream = getStream(latl); const stream = getStream(latl);
expect(stream).toStrictEqual(tokens); expect(stream).toStrictEqual(tokens);
});
it('lexes all set join operations', () => {
const { latl, tokens } = assertionData.setOperationsJoin;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
});
it('lexes set filter, concat, and dissoc operations', () => {
const { latl, tokens } = assertionData.setOperations;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
}) })
}) })