diff --git a/public/latl/waffle.latl b/public/latl/waffle.latl index cd1d930..3b906e3 100644 --- a/public/latl/waffle.latl +++ b/public/latl/waffle.latl @@ -48,6 +48,8 @@ set PLOSIVES = [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ] ; { SET_A and SET_B } inner join ; { SET_A or SET_B } full outer join ; { not SET_A } = { GLOBAL not SET_A } + +; ---- unnecessary sugar ; { not SET_A nor SET_B } = { GLOBAL not { SET_A or SET_B } } ; ---- set character operations - non-mutable! diff --git a/src/utils/latl/grammar.js b/src/utils/latl/grammar.js index 378577f..dfdbb2c 100644 --- a/src/utils/latl/grammar.js +++ b/src/utils/latl/grammar.js @@ -69,7 +69,7 @@ var grammar = { d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t), d => d.map(t => t.length ? // pretty ugly ([ { type: 'aias', alias: [ string ] }] ) => { setAlias: str } - { setAlias: t.reduce((aliases, token) => token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] } + { setAlias: t.reduce((aliases, token) => token && token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] } : t), ) }, @@ -79,7 +79,11 @@ var grammar = { d => d.filter(t => !!t), d => ({type: 'alias', alias: d }), ) }, - {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess": + {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)]}, + {"name": "setExpression$ebnf$1$subexpression$1", "symbols": ["setOperation"]}, + {"name": "setExpression$ebnf$1", "symbols": ["setExpression$ebnf$1$subexpression$1"], "postprocess": id}, + {"name": "setExpression$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}}, + {"name": "setExpression", "symbols": [(lexer.has("openCurlyBracket") ? {type: "openCurlyBracket"} : openCurlyBracket), "_", "setExpression$ebnf$1", "_", (lexer.has("closeCurlyBracket") ? {type: "closeCurlyBracket"} : closeCurlyBracket)], "postprocess": pipe( // filters commas and whitespace d => d.filter(t => t && t.length), @@ -96,7 +100,15 @@ var grammar = { pipe( d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d ) - } + }, + {"name": "setOperation", "symbols": ["orOperation"]}, + {"name": "setOperation", "symbols": [(lexer.has("identifier") ? {type: "identifier"} : identifier)], "postprocess": pipe( + d => d.type ? d : ({ identifier: d.toString(), type: 'identifier' }) + )}, + {"name": "orOperation", "symbols": ["_", "setOperation", "__", (lexer.has("kwSetOr") ? {type: "kwSetOr"} : kwSetOr), "__", "setOperation", "_"], "postprocess": pipe( + d => d.filter(d => !!d), + d => ({ type: 'operator', operator: 'or', operands: [ d[0], d[2] ] }), + ) } ] , ParserStart: "main" } diff --git a/src/utils/latl/grammar.ne b/src/utils/latl/grammar.ne index 40bc172..d58e15f 100644 --- a/src/utils/latl/grammar.ne +++ b/src/utils/latl/grammar.ne @@ -68,7 +68,7 @@ setDefinition -> %setIdentifier (setAlias):? __ equal __ setExpression d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t), d => d.map(t => t.length ? // pretty ugly ([ { type: 'aias', alias: [ string ] }] ) => { setAlias: str } - { setAlias: t.reduce((aliases, token) => token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] } + { setAlias: t.reduce((aliases, token) => token && token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] } : t), ) %} @@ -80,6 +80,7 @@ setAlias -> %comma _ %setIdentifier d => ({type: 'alias', alias: d }), ) %} setExpression -> %openSquareBracket _ phoneList _ %closeSquareBracket + | %openCurlyBracket _ (setOperation):? _ %closeCurlyBracket {% pipe( // filters commas and whitespace @@ -93,3 +94,14 @@ phoneList -> (%phone (%comma _):* ):* d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d ) %} +setOperation -> orOperation + | %identifier + {% pipe( + d => d.type ? d : ({ identifier: d.toString(), type: 'identifier' }) + )%} + +orOperation -> _ setOperation __ %kwSetOr __ setOperation _ + {% pipe( + d => d.filter(d => !!d), + d => ({ type: 'operator', operator: 'or', operands: [ d[0], d[2] ] }), + ) %} \ No newline at end of file diff --git a/src/utils/latl/lexer.js b/src/utils/latl/lexer.js index bae2c3f..30f85e4 100644 --- a/src/utils/latl/lexer.js +++ b/src/utils/latl/lexer.js @@ -85,23 +85,22 @@ const lexer = moo.states({ closeCurlyBracket: { match: /\}/, pop: true }, // ! restrict identifiers keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield', 'concat', 'dissoc'], type: moo.keywords({ - 'kw-set-not': 'not' , - 'kw-set-and': 'and' , - 'kw-set-or': 'or' , - 'kw-set-nor': 'nor' , - 'kw-set-in': 'in' , - 'kw-set-yield': 'yield' , - 'kw-set-concat': 'concat', - 'kw-set-dissoc': 'dissoc' + 'kwSetNot': 'not' , + 'kwSetAnd': 'and' , + 'kwSetOr': 'or' , + 'kwSetNor': 'nor' , + 'kwSetIn': 'in' , + 'kwSetYield': 'yield' , + 'kwSetConcat': 'concat', + 'kwSetDissoc': 'dissoc' }) }, identifier: /[A-Z]+[A-Z_]+/, - whiteSpace: /[\t ]+/, + whiteSpace: { match: /\s+/, lineBreaks: true }, openSquareBracket: /\]/, closeSquareBracket: /\[/, identifier: /[A-Z]+[A-Z_]*/, phone: /[\u00c0-\u03FFa-z]+/, - newLine: { match: /\n/, lineBreaks: true } }, commaOperation: { diff --git a/src/utils/latl/test/assertionData.js b/src/utils/latl/test/assertionData.js index cde270e..174dde8 100644 --- a/src/utils/latl/test/assertionData.js +++ b/src/utils/latl/test/assertionData.js @@ -242,6 +242,52 @@ set NASAL_PULMONIC_CONSONANTS, N = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, }, setDefinitionJoin: { latl: ` +set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS }`, + tokens: [ + { type: 'whiteSpace', value: '\n'}, + { type: 'kwSet', value: 'set'}, + { type: 'whiteSpace', value: ' '}, + { type: 'setIdentifier', value: 'CLICK_CONSONANTS'}, + { type: 'whiteSpace', value: ' '}, + { type: 'equal', value: '='}, + { type: 'whiteSpace', value: ' '}, + { type: 'openCurlyBracket', value: '{'}, + { type: 'whiteSpace', value: ' '}, + { type: 'identifier', value: 'TENUIS_CLICK_CONSONANTS'}, + { type: 'whiteSpace', value: ' '}, + { type: 'kwSetOr', value: 'or'}, + { type: 'whiteSpace', value: ' '}, + { type: 'identifier', value: 'VOICED_CLICK_CONSONANTS'}, + { type: 'whiteSpace', value: ' '}, + { type: 'closeCurlyBracket', value: '}'} + ], + AST: { + main: [ + { + type: 'set', + setIdentifier: 'CLICK_CONSONANTS', + setExpression: [ + { + type: 'operator', + operator: 'or', + operands: [ + { + type: 'identifier', + identifier: 'TENUIS_CLICK_CONSONANTS' + }, + { + type: 'identifier', + identifier: 'VOICED_CLICK_CONSONANTS' + }, + ] + } + ] + } + ] + } + }, + setDefinitionMultiJoin: { + latl: ` set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS or NASAL_CLICK_CONSONANTS or L_CLICK_CONSONANTS }`, @@ -257,21 +303,18 @@ set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS { type: 'whiteSpace', value: ' '}, { type: 'identifier', value: 'TENUIS_CLICK_CONSONANTS'}, { type: 'whiteSpace', value: ' '}, - { type: 'kw-set-or', value: 'or'}, + { type: 'kwSetOr', value: 'or'}, { type: 'whiteSpace', value: ' '}, { type: 'identifier', value: 'VOICED_CLICK_CONSONANTS'}, - { type: 'newLine', value: '\n'}, - { type: 'whiteSpace', value: ' '}, - { type: 'kw-set-or', value: 'or'}, + { type: 'whiteSpace', value: '\n '}, + { type: 'kwSetOr', value: 'or'}, { type: 'whiteSpace', value: ' '}, { type: 'identifier', value: 'NASAL_CLICK_CONSONANTS'}, { type: 'whiteSpace', value: ' '}, - { type: 'kw-set-or', value: 'or'}, + { type: 'kwSetOr', value: 'or'}, { type: 'whiteSpace', value: ' '}, { type: 'identifier', value: 'L_CLICK_CONSONANTS'}, - { type: 'whiteSpace', value: ' '}, - { type: 'newLine', value: '\n'}, - { type: 'whiteSpace', value: ' '}, + { type: 'whiteSpace', value: ' \n '}, { type: 'closeCurlyBracket', value: '}'} ], AST: { @@ -340,11 +383,11 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] }, { type: 'whiteSpace', value: ' ', }, { type: 'openSquareBracket', value: ']', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-in', value: 'in', }, + { type: 'kwSetIn', value: 'in', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'ORAL_VOWELS', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, @@ -369,11 +412,11 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] }, { type: 'whiteSpace', value: ' ', }, { type: 'openSquareBracket', value: ']', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-in', value: 'in', }, + { type: 'kwSetIn', value: 'in', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'NASAL_VOWELS', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, @@ -398,7 +441,7 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] }, { type: 'whiteSpace', value: ' ', }, { type: 'openSquareBracket', value: ']', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-in', value: 'in', }, + { type: 'kwSetIn', value: 'in', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'NASAL_VOWELS', }, { type: 'whiteSpace', value: ' ', }, @@ -428,7 +471,7 @@ set SET_C = { SET_A not SET_B }, ; left anti join { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-not', value: 'not', }, + { type: 'kwSetNot', value: 'not', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_B', }, { type: 'whiteSpace', value: ' ', }, @@ -444,7 +487,7 @@ set SET_C = { SET_A not SET_B }, ; left anti join { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-and', value: 'and', }, + { type: 'kwSetAnd', value: 'and', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_B', }, { type: 'whiteSpace', value: ' ', }, @@ -460,7 +503,7 @@ set SET_C = { SET_A not SET_B }, ; left anti join { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-or', value: 'or', }, + { type: 'kwSetOr', value: 'or', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_B', }, { type: 'whiteSpace', value: ' ', }, @@ -474,7 +517,7 @@ set SET_C = { SET_A not SET_B }, ; left anti join { type: 'whiteSpace', value: ' ', }, { type: 'openCurlyBracket', value: '{', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-not', value: 'not', }, + { type: 'kwSetNot', value: 'not', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, @@ -488,11 +531,11 @@ set SET_C = { SET_A not SET_B }, ; left anti join { type: 'whiteSpace', value: ' ', }, { type: 'openCurlyBracket', value: '{', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-not', value: 'not', }, + { type: 'kwSetNot', value: 'not', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-nor', value: 'nor', }, + { type: 'kwSetNor', value: 'nor', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_B', }, { type: 'whiteSpace', value: ' ', }, @@ -532,7 +575,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'openSquareBracket', value: ']', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-in', value: 'in', }, + { type: 'kwSetIn', value: 'in', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, @@ -548,7 +591,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, @@ -569,13 +612,13 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'X', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-concat', value: 'concat', }, + { type: 'kwSetConcat', value: 'concat', }, { type: 'whiteSpace', value: ' ', }, { type: 'phone', value: 'y', }, { type: 'whiteSpace', value: ' ', }, @@ -592,13 +635,13 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, { type: 'phone', value: 'y', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-concat', value: 'concat', }, + { type: 'kwSetConcat', value: 'concat', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'X', }, { type: 'whiteSpace', value: ' ', }, @@ -615,7 +658,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'phone', value: 'y', }, { type: 'closeSquareBracket', value: '[', }, @@ -636,11 +679,11 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'phone', value: 'y', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-dissoc', value: 'dissoc', }, + { type: 'kwSetDissoc', value: 'dissoc', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, @@ -659,7 +702,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, @@ -667,7 +710,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'openSquareBracket', value: ']', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-dissoc', value: 'dissoc', }, + { type: 'kwSetDissoc', value: 'dissoc', }, { type: 'whiteSpace', value: ' ', }, { type: 'phone', value: 'y', }, { type: 'whiteSpace', value: ' ', }, @@ -687,11 +730,11 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and { type: 'whiteSpace', value: ' ', }, { type: 'openSquareBracket', value: ']', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-in', value: 'in', }, + { type: 'kwSetIn', value: 'in', }, { type: 'whiteSpace', value: ' ', }, { type: 'identifier', value: 'SET_A', }, { type: 'whiteSpace', value: ' ', }, - { type: 'kw-set-yield', value: 'yield', }, + { type: 'kwSetYield', value: 'yield', }, { type: 'whiteSpace', value: ' ', }, { type: 'closeSquareBracket', value: '[', }, { type: 'whiteSpace', value: ' ', }, diff --git a/src/utils/latl/test/parser.test.js b/src/utils/latl/test/parser.test.js index ba7638e..93337dc 100644 --- a/src/utils/latl/test/parser.test.js +++ b/src/utils/latl/test/parser.test.js @@ -36,8 +36,7 @@ describe('parser', () => { const { latl, AST } = assertionData.setDefinitionJoin; const feedResults = parser().feed(latl).results; expect(feedResults[0]).toStrictEqual(AST); - } - ); + }); it.todo('lexes set definition with yield operation' // , () => {