From 40aec305370eb7715ab576d8be015978c5e7ea0e Mon Sep 17 00:00:00 2001 From: Sorrel Bri Date: Fri, 8 May 2020 23:32:49 -0700 Subject: [PATCH] parse AST for single set definition --- src/utils/latl/grammar.js | 49 ++++++++++++++++++++-------- src/utils/latl/grammar.ne | 48 ++++++++++++++++++--------- src/utils/latl/test/assertionData.js | 47 ++++++++++++++++++++------ src/utils/latl/test/parser.test.js | 37 +++++++++++++++------ 4 files changed, 131 insertions(+), 50 deletions(-) diff --git a/src/utils/latl/grammar.js b/src/utils/latl/grammar.js index 9f6ba68..cceab65 100644 --- a/src/utils/latl/grammar.js +++ b/src/utils/latl/grammar.js @@ -17,6 +17,7 @@ function id(x) { return x[0]; } return acc; }, []); const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d); + const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {}); var grammar = { Lexer: lexer, ParserRules: [ @@ -24,32 +25,52 @@ var grammar = { {"name": "main$ebnf$1$subexpression$1", "symbols": ["_", "statement"]}, {"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}}, {"name": "main", "symbols": ["main$ebnf$1", "_"], "postprocess": pipe( - getTerminal, - clearNull, - flag('main'), - getTerminal, + getTerminal, + clearNull, + flag('main'), + getTerminal, ) }, {"name": "_$ebnf$1$subexpression$1", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)]}, {"name": "_$ebnf$1", "symbols": ["_$ebnf$1$subexpression$1"], "postprocess": id}, {"name": "_$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}}, {"name": "_", "symbols": ["_$ebnf$1"], "postprocess": remove}, {"name": "__", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)], "postprocess": remove}, + {"name": "equal", "symbols": [(lexer.has("equal") ? {type: "equal"} : equal)], "postprocess": remove}, {"name": "statement", "symbols": ["comment"]}, - {"name": "statement", "symbols": ["definition"], "postprocess": pipe(getTerminal)}, + {"name": "statement", "symbols": ["definition"], "postprocess": pipe( + objFromArr + ) }, {"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": pipe(getTerminal, remove)}, - {"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": d => ({[d[0].value]: d[2]})}, + {"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": pipe( + d => ({[d[0].value]: objFromArr(d[2]) }), + ) }, {"name": "setDefinition$ebnf$1", "symbols": []}, - {"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]}, + {"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", "equal", "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]}, {"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1", "setDefinition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}}, - {"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression"], "postprocess": d => { - if (d.type === 'setIdentifier') return { setIdentifier: d.value } - return d - } }, - {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)]}, + {"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", "equal", "__", "setExpression"], "postprocess": + pipe( + d => d.filter(t => !!t && t.length !== 0), + d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t), + d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t) + ) + }, + {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess": + pipe( + d => d.filter(t => t && t.length), + d => d.map(t => t.map(u => u[0])), + flag('setExpression') + ) }, {"name": "phoneList$ebnf$1", "symbols": []}, - {"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), (lexer.has("comma") ? {type: "comma"} : comma), "_"]}, + {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": []}, + {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1", "symbols": [(lexer.has("comma") ? {type: "comma"} : comma), "_"]}, + {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": ["phoneList$ebnf$1$subexpression$1$ebnf$1", "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}}, + {"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), "phoneList$ebnf$1$subexpression$1$ebnf$1"]}, {"name": "phoneList$ebnf$1", "symbols": ["phoneList$ebnf$1", "phoneList$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}}, - {"name": "phoneList", "symbols": ["phoneList$ebnf$1", (lexer.has("phone") ? {type: "phone"} : phone)], "postprocess": pipe(d => d ? d.toString() : d)} + {"name": "phoneList", "symbols": ["phoneList$ebnf$1"], "postprocess": + pipe( + d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d + ) + } ] , ParserStart: "main" } diff --git a/src/utils/latl/grammar.ne b/src/utils/latl/grammar.ne index fbb5cc8..1fd3f1b 100644 --- a/src/utils/latl/grammar.ne +++ b/src/utils/latl/grammar.ne @@ -13,6 +13,7 @@ return acc; }, []); const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d); + const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {}); %} @lexer lexer @@ -23,7 +24,7 @@ main -> (_ statement):* _ clearNull, flag('main'), getTerminal, - ) %} + ) %} _ -> (%whiteSpace):? {% remove %} @@ -31,26 +32,41 @@ _ -> (%whiteSpace):? __ -> %whiteSpace {% remove %} +equal -> %equal + {% remove %} + statement -> comment | definition - {% pipe(getTerminal) %} + {% pipe( + objFromArr + ) %} comment -> %comment {% pipe(getTerminal, remove) %} # SETS definition -> %kwSet __ setDefinition - {% d => ({[d[0].value]: d[2]}) %} -setDefinition -> (%setIdentifier __ %equal __ setExpression %comma __):* %setIdentifier __ %equal __ setExpression - {% d => { - if (d.type === 'setIdentifier') return { setIdentifier: d.value } - return d - } %} + {% pipe( + d => ({[d[0].value]: objFromArr(d[2]) }), + ) %} + # {% flag('definition') %} +setDefinition -> (%setIdentifier __ equal __ setExpression %comma __):* %setIdentifier __ equal __ setExpression + {% + pipe( + d => d.filter(t => !!t && t.length !== 0), + d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t), + d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t) + ) + %} setExpression -> %openSquareBracket _ phoneList _ %closeSquareBracket - # {% pipe(d => d.filter(t => t && t.length)) %} -phoneList -> (%phone %comma _):* %phone - {% pipe(d => d ? d.toString() : d) %} - # {% d => d.filter(t => t && (t.type === 'phone' || t[0]) ) - # .flatMap(t => { - # if (!t.length) return t; - # return t[0].filter(st => st && st.type === 'phone') - # }) %} + {% + pipe( + d => d.filter(t => t && t.length), + d => d.map(t => t.map(u => u[0])), + flag('setExpression') + ) %} +phoneList -> (%phone (%comma _):* ):* + {% + pipe( + d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d + ) + %} diff --git a/src/utils/latl/test/assertionData.js b/src/utils/latl/test/assertionData.js index 174efff..fedfa8c 100644 --- a/src/utils/latl/test/assertionData.js +++ b/src/utils/latl/test/assertionData.js @@ -10,12 +10,37 @@ export const assertionData = { code: '' }, simpleSetDefinition: { - latl: `set PLOSIVES`, + latl: `set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ ]`, tokens: [ { type: 'kwSet', value: 'set' }, { type: 'whiteSpace', value: ' ' }, - { type: 'setIdentifier', value: 'PLOSIVES' } + { type: 'setIdentifier', value: "NASAL_PULMONIC_CONSONANTS" }, + { type: "whiteSpace", value: " ", }, + { type: "equal", value: "=", }, + { type: "whiteSpace", value: " ", }, + { type: "openSquareBracket", value: "[", }, + { type: "whiteSpace", value: " ", }, + { type: "phone", value: "m̥", }, + { type: "comma", value: ",", }, + { type: "whiteSpace", value: " ", }, + { type: "phone", value: "m", }, + { type: "comma", value: ",", }, + { type: "whiteSpace", value: " ", }, + { type: "phone", value: "ɱ", }, + { type: "whiteSpace", value: " ", }, + { type: "closeSquareBracket", value: "]"} ], + AST: { + main: [ + { + set: { + setIdentifier: 'NASAL_PULMONIC_CONSONANTS', + setExpression: [ 'm̥', 'm', 'ɱ' ] + } + } + ] + } + , code: '' }, commaSetDefinition: { @@ -137,14 +162,16 @@ set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, AST: { main: [ { - set: { - setIdentifier: 'NASAL_PULMONIC_CONSONANTS', - items: [ 'm̥', 'm', 'ɱ', 'n̼', 'n̥', 'n', 'ɳ̊', 'ɳ', 'ɲ̊', 'ɲ', `ŋ`, ' ̊ŋ', 'ɴ' ] - }, - set: { - setIdentifier: 'STOP_PULMONIC_CONSONANTS', - items: [ 'p', 'b', 'p̪', 'b̪', 't̼', 'd̼', 't', 'd', 'ʈ', 'ɖ', 'c', 'ɟ', 'k', 'ɡ', 'q', 'ɢ', 'ʡ', 'ʔ' ] - } + set: [ + { + setIdentifier: 'NASAL_PULMONIC_CONSONANTS', + items: [ 'm̥', 'm', 'ɱ', 'n̼', 'n̥', 'n', 'ɳ̊', 'ɳ', 'ɲ̊', 'ɲ', `ŋ`, ' ̊ŋ', 'ɴ' ] + }, + { + setIdentifier: 'STOP_PULMONIC_CONSONANTS', + items: [ 'p', 'b', 'p̪', 'b̪', 't̼', 'd̼', 't', 'd', 'ʈ', 'ɖ', 'c', 'ɟ', 'k', 'ɡ', 'q', 'ɢ', 'ʡ', 'ʔ' ] + } + ] } ] } diff --git a/src/utils/latl/test/parser.test.js b/src/utils/latl/test/parser.test.js index 89bb518..ce36f87 100644 --- a/src/utils/latl/test/parser.test.js +++ b/src/utils/latl/test/parser.test.js @@ -10,6 +10,13 @@ describe('parser', () => { expect(feedResults[0]).toStrictEqual(AST) }) + it('parses simple set definition', () => { + const { latl, AST } = assertionData.simpleSetDefinition; + const feedResults = parser().feed(latl).results; + expect(feedResults.length).toBe(1); + expect(feedResults[0]).toStrictEqual(AST); + }) + it('parses multiple set definitions with comma operator', () => { const { latl, AST } = assertionData.commaSetDefinition; const feedResults = parser().feed(latl).results; @@ -17,35 +24,45 @@ describe('parser', () => { expect(feedResults[0]).toStrictEqual(AST); }); - // it('lexes set definition with alias', () => { + it.todo('lexes set definition with alias' + // , () => { // const { latl, tokens } = assertionData.setAliasDefinition; // const stream = getStream(latl); // expect(stream).toStrictEqual(tokens); - // }); + // } + ); - // it('lexes set definition with set join', () => { + it.todo('lexes set definition with set join' + // , () => { // const { latl, tokens } = assertionData.setDefinitionJoin; // const stream = getStream(latl); // expect(stream).toStrictEqual(tokens); - // }); + // } + ); - // it('lexes set definition with yield operation', () => { + it.todo('lexes set definition with yield operation' + // , () => { // const { latl, tokens } = assertionData.setDefinitionYield; // const stream = getStream(latl); // expect(stream).toStrictEqual(tokens); - // }); + // } + ); - // it('lexes all set join operations', () => { + it.todo('lexes all set join operations' + // , () => { // const { latl, tokens } = assertionData.setOperationsJoin; // const stream = getStream(latl); // expect(stream).toStrictEqual(tokens); - // }); + // } + ); - // it('lexes set filter, concat, and dissoc operations', () => { + it.todo('lexes set filter, concat, and dissoc operations' + // , () => { // const { latl, tokens } = assertionData.setOperations; // const stream = getStream(latl); // expect(stream).toStrictEqual(tokens); - // }) + // } + ) }) // {