From 7c75be543f98a14361f6d077798e892deaa7b10a Mon Sep 17 00:00:00 2001 From: Sorrel Bri Date: Sun, 15 Mar 2020 21:40:31 -0700 Subject: [PATCH] define tokens for lexing set [concat], [dissoc] operations --- src/utils/latl/lexer.js | 6 +- src/utils/latl/test/assertionData.js | 303 ++++++++++++++++++++++++++- src/utils/latl/test/lexer.test.js | 18 +- 3 files changed, 322 insertions(+), 5 deletions(-) diff --git a/src/utils/latl/lexer.js b/src/utils/latl/lexer.js index d48d87b..995aac7 100644 --- a/src/utils/latl/lexer.js +++ b/src/utils/latl/lexer.js @@ -73,6 +73,7 @@ export const lexer = moo.states({ }, setDefinition: { + comment: /;.*$/, setIdentifier: { match: /[A-Z]+[A-Z_]*/ }, openCurlyBracket: { match: /\{/, push: 'setOperation' }, equal: /=/, @@ -87,13 +88,15 @@ export const lexer = moo.states({ setOperation: { closeCurlyBracket: { match: /\}/, pop: true }, // ! restrict identifiers - keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield'], type: moo.keywords({ + keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield', 'concat', 'dissoc'], type: moo.keywords({ 'kw-set-not': 'not' , 'kw-set-and': 'and' , 'kw-set-or': 'or' , 'kw-set-nor': 'nor' , 'kw-set-in': 'in' , 'kw-set-yield': 'yield' , + 'kw-set-concat': 'concat', + 'kw-set-dissoc': 'dissoc' }) }, identifier: /[A-Z]+[A-Z_]+/, @@ -108,6 +111,7 @@ export const lexer = moo.states({ commaOperation: { // if comma is detected during a definition, the commaOperation consumes all white space and pops back to definition // this prevents popping back to main + comment: /\s*;.*$/, whiteSpace: { match: /\s+/, lineBreaks: true, pop: true }, newLine: { match: /\n/, lineBreaks: true, pop: true } } diff --git a/src/utils/latl/test/assertionData.js b/src/utils/latl/test/assertionData.js index 9e2badc..c224ef6 100644 --- a/src/utils/latl/test/assertionData.js +++ b/src/utils/latl/test/assertionData.js @@ -293,5 +293,306 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] }, { type: 'whiteSpace', value: ' ', }, { type: 'closeCurlyBracket', value: '}', }, ] - } + }, + + setOperationsJoin: { + latl: ` +; ---- set join operations non-mutable! +set SET_C = { SET_A not SET_B }, ; left anti join + SET_D = { SET_A and SET_B }, ; inner join + SET_E = { SET_A or SET_B }, ; full outer join + SET_F = { not SET_A }, ; = { GLOBAL not SET_A } + SET_G = { not SET_A nor SET_B } ; = { GLOBAL not { SET_A or SET_B } }`, + tokens: [ + { type: 'whiteSpace', value: '\n', }, + { type: 'comment', value: '; ---- set join operations non-mutable! ', }, + { type: 'whiteSpace', value: '\n', }, + { type: 'kw-set', value: 'set', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'setIdentifier', value: 'SET_C', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-not', value: 'not', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_B', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; left anti join', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_D', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-and', value: 'and', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_B', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; inner join', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_E', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-or', value: 'or', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_B', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; full outer join', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_F', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-not', value: 'not', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; = { GLOBAL not SET_A }', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_G', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-not', value: 'not', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-nor', value: 'nor', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_B', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'comment', value: '; = { GLOBAL not { SET_A or SET_B } }', } + ] + }, + + setOperations: { + latl: ` +; ---- set character operations - non-mutable! +set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and y is a filtering character + SET_C = { SET_A yield [ Xy ] }, ; CONCATENATE: performs transformation with (prepended or) appended character + SET_D = { SET_A yield [ X concat y ] }, + SET_E = { SET_A yield [ y concat X ] }, + SET_F = { SET_A yield y[ X ] }, ; DISSOCIATE: performs transformation removing prepended (or appended) character + SET_G = { SET_A yield y dissoc [ X ] }, + SET_H = { SET_A yield [ X ] dissoc y }, + SET_I = { [ Xy ] in SET_A yield [ X ]y } ; combined FILTER and DISSOCIATE`, + tokens: [ + { type: 'whiteSpace', value: '\n', }, + { type: 'comment', value: '; ---- set character operations - non-mutable!', }, + { type: 'whiteSpace', value: '\n', }, + { type: 'kw-set', value: 'set', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'setIdentifier', value: 'SET_B', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-in', value: 'in', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; FILTER: where X is any character and y is a filtering character', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_C', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; CONCATENATE: performs transformation with (prepended or) appended character', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_D', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-concat', value: 'concat', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_E', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-concat', value: 'concat', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_F', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'phone', value: 'y', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'comment', value: ' ; DISSOCIATE: performs transformation removing prepended (or appended) character', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_G', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-dissoc', value: 'dissoc', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_H', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-dissoc', value: 'dissoc', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'comma', value: ',', }, + { type: 'whiteSpace', value: '\n ', }, + { type: 'setIdentifier', value: 'SET_I', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'equal', value: '=', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openCurlyBracket', value: '{', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-in', value: 'in', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'SET_A', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'kw-set-yield', value: 'yield', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeSquareBracket', value: '[', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'identifier', value: 'X', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'openSquareBracket', value: ']', }, + { type: 'phone', value: 'y', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'closeCurlyBracket', value: '}', }, + { type: 'whiteSpace', value: ' ', }, + { type: 'comment', value: '; combined FILTER and DISSOCIATE', } + ] + }, + } \ No newline at end of file diff --git a/src/utils/latl/test/lexer.test.js b/src/utils/latl/test/lexer.test.js index 7f37d53..d0cb9ea 100644 --- a/src/utils/latl/test/lexer.test.js +++ b/src/utils/latl/test/lexer.test.js @@ -35,19 +35,19 @@ describe('lexer', () => { it('lexes multiple set definitions with comma operator', () => { const { latl, tokens } = assertionData.setDefinition; - const stream = getStream(latl); + const stream = getStream(latl); expect(stream).toStrictEqual(tokens); }); it('lexes set definition with alias', () => { const { latl, tokens } = assertionData.setAliasDefinition; - const stream = getStream(latl); + const stream = getStream(latl); expect(stream).toStrictEqual(tokens); }); it('lexes set definition with set join', () => { const { latl, tokens } = assertionData.setDefinitionJoin; - const stream = getStream(latl); + const stream = getStream(latl); expect(stream).toStrictEqual(tokens); }); @@ -55,5 +55,17 @@ describe('lexer', () => { const { latl, tokens } = assertionData.setDefinitionYield; const stream = getStream(latl); expect(stream).toStrictEqual(tokens); + }); + + it('lexes all set join operations', () => { + const { latl, tokens } = assertionData.setOperationsJoin; + const stream = getStream(latl); + expect(stream).toStrictEqual(tokens); + }); + + it('lexes set filter, concat, and dissoc operations', () => { + const { latl, tokens } = assertionData.setOperations; + const stream = getStream(latl); + expect(stream).toStrictEqual(tokens); }) }) \ No newline at end of file