From 40aec305370eb7715ab576d8be015978c5e7ea0e Mon Sep 17 00:00:00 2001
From: Sorrel Bri <sorrel.bri.june@gmail.com>
Date: Fri, 8 May 2020 23:32:49 -0700
Subject: [PATCH] parse AST for single set definition

---
 src/utils/latl/grammar.js            | 49 ++++++++++++++++++++--------
 src/utils/latl/grammar.ne            | 48 ++++++++++++++++++---------
 src/utils/latl/test/assertionData.js | 47 ++++++++++++++++++++------
 src/utils/latl/test/parser.test.js   | 37 +++++++++++++++------
 4 files changed, 131 insertions(+), 50 deletions(-)

diff --git a/src/utils/latl/grammar.js b/src/utils/latl/grammar.js
index 9f6ba68..cceab65 100644
--- a/src/utils/latl/grammar.js
+++ b/src/utils/latl/grammar.js
@@ -17,6 +17,7 @@ function id(x) { return x[0]; }
     return acc;
   }, []);
   const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d);
+  const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {});
 var grammar = {
     Lexer: lexer,
     ParserRules: [
@@ -24,32 +25,52 @@ var grammar = {
     {"name": "main$ebnf$1$subexpression$1", "symbols": ["_", "statement"]},
     {"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
     {"name": "main", "symbols": ["main$ebnf$1", "_"], "postprocess":  pipe(
-        getTerminal,
-        clearNull,
-        flag('main'), 
-        getTerminal,
+          getTerminal,
+          clearNull,
+          flag('main'), 
+          getTerminal,
         ) },
     {"name": "_$ebnf$1$subexpression$1", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)]},
     {"name": "_$ebnf$1", "symbols": ["_$ebnf$1$subexpression$1"], "postprocess": id},
     {"name": "_$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
     {"name": "_", "symbols": ["_$ebnf$1"], "postprocess": remove},
     {"name": "__", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)], "postprocess": remove},
+    {"name": "equal", "symbols": [(lexer.has("equal") ? {type: "equal"} : equal)], "postprocess": remove},
     {"name": "statement", "symbols": ["comment"]},
-    {"name": "statement", "symbols": ["definition"], "postprocess": pipe(getTerminal)},
+    {"name": "statement", "symbols": ["definition"], "postprocess":  pipe(
+          objFromArr
+        ) },
     {"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": pipe(getTerminal, remove)},
-    {"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": d => ({[d[0].value]: d[2]})},
+    {"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess":  pipe(
+          d => ({[d[0].value]: objFromArr(d[2]) }),
+        ) },
     {"name": "setDefinition$ebnf$1", "symbols": []},
-    {"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
+    {"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", "equal", "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
     {"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1", "setDefinition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
-    {"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression"], "postprocess":  d => {
-          if (d.type === 'setIdentifier') return { setIdentifier: d.value }
-          return d
-        } },
-    {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)]},
+    {"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", "equal", "__", "setExpression"], "postprocess":  
+        pipe(
+          d => d.filter(t => !!t && t.length !== 0),
+          d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t),
+          d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t)
+        )    
+                        },
+    {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess":  
+        pipe(
+          d => d.filter(t => t && t.length), 
+          d => d.map(t => t.map(u => u[0])), 
+          flag('setExpression') 
+        ) },
     {"name": "phoneList$ebnf$1", "symbols": []},
-    {"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), (lexer.has("comma") ? {type: "comma"} : comma), "_"]},
+    {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": []},
+    {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1", "symbols": [(lexer.has("comma") ? {type: "comma"} : comma), "_"]},
+    {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": ["phoneList$ebnf$1$subexpression$1$ebnf$1", "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
+    {"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), "phoneList$ebnf$1$subexpression$1$ebnf$1"]},
     {"name": "phoneList$ebnf$1", "symbols": ["phoneList$ebnf$1", "phoneList$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
-    {"name": "phoneList", "symbols": ["phoneList$ebnf$1", (lexer.has("phone") ? {type: "phone"} : phone)], "postprocess": pipe(d => d ? d.toString() : d)}
+    {"name": "phoneList", "symbols": ["phoneList$ebnf$1"], "postprocess":  
+        pipe(
+          d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d
+        )
+                        }
 ]
   , ParserStart: "main"
 }
diff --git a/src/utils/latl/grammar.ne b/src/utils/latl/grammar.ne
index fbb5cc8..1fd3f1b 100644
--- a/src/utils/latl/grammar.ne
+++ b/src/utils/latl/grammar.ne
@@ -13,6 +13,7 @@
     return acc;
   }, []);
   const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d);
+  const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {});
 %}
 
 @lexer lexer
@@ -23,7 +24,7 @@ main            -> (_ statement):* _
     clearNull,
     flag('main'), 
     getTerminal,
-    ) %}
+  ) %}
 
 _               -> (%whiteSpace):? 
   {% remove %}
@@ -31,26 +32,41 @@ _               -> (%whiteSpace):?
 __              -> %whiteSpace 
   {% remove %}
 
+equal           -> %equal
+  {% remove %}
+
 statement       -> comment | definition
-  {% pipe(getTerminal) %}
+  {% pipe(
+    objFromArr
+  ) %}
 
 comment         -> %comment 
   {% pipe(getTerminal, remove) %}
 
 # SETS
 definition      -> %kwSet __ setDefinition 
-                {% d => ({[d[0].value]: d[2]}) %}
-setDefinition   -> (%setIdentifier __ %equal __ setExpression %comma __):* %setIdentifier __ %equal __ setExpression
-                {% d => {
-                  if (d.type === 'setIdentifier') return { setIdentifier: d.value }
-                  return d
-                } %}
+                {% pipe(
+                  d => ({[d[0].value]: objFromArr(d[2]) }),
+                ) %}
+                # {% flag('definition') %}
+setDefinition   -> (%setIdentifier __ equal __ setExpression %comma __):* %setIdentifier __ equal __ setExpression
+                {% 
+                  pipe(
+                    d => d.filter(t => !!t && t.length !== 0),
+                    d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t),
+                    d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t)
+                  )    
+                %}
 setExpression   -> %openSquareBracket _ phoneList _ %closeSquareBracket
-                # {% pipe(d => d.filter(t => t && t.length)) %}
-phoneList       -> (%phone %comma _):* %phone
-                {% pipe(d => d ? d.toString() : d) %}
-  # {% d => d.filter(t => t && (t.type === 'phone' || t[0]) )
-  # .flatMap(t => {
-  #   if (!t.length) return t;
-  #   return t[0].filter(st => st && st.type === 'phone')
-  # }) %}
+                {% 
+                  pipe(
+                    d => d.filter(t => t && t.length), 
+                    d => d.map(t => t.map(u => u[0])), 
+                    flag('setExpression') 
+                  ) %}
+phoneList       -> (%phone (%comma _):* ):*
+                {% 
+                  pipe(
+                    d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d
+                  )
+                %}
diff --git a/src/utils/latl/test/assertionData.js b/src/utils/latl/test/assertionData.js
index 174efff..fedfa8c 100644
--- a/src/utils/latl/test/assertionData.js
+++ b/src/utils/latl/test/assertionData.js
@@ -10,12 +10,37 @@ export const assertionData = {
     code: ''
   },
   simpleSetDefinition: {
-    latl: `set PLOSIVES`,
+    latl: `set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ ]`,
     tokens: [
       { type: 'kwSet', value: 'set' }, 
       { type: 'whiteSpace', value: ' ' }, 
-      { type: 'setIdentifier', value: 'PLOSIVES' }
+      { type: 'setIdentifier', value: "NASAL_PULMONIC_CONSONANTS" }, 
+      { type: "whiteSpace", value: " ", }, 
+      { type: "equal", value: "=", }, 
+      { type: "whiteSpace", value: " ", }, 
+      { type: "openSquareBracket", value: "[", }, 
+      { type: "whiteSpace", value: " ", }, 
+      { type: "phone", value: "m̥", }, 
+      { type: "comma", value: ",", }, 
+      { type: "whiteSpace", value: " ", }, 
+      { type: "phone", value: "m", }, 
+      { type: "comma", value: ",", }, 
+      { type: "whiteSpace", value: " ", }, 
+      { type: "phone", value: "ɱ", }, 
+      { type: "whiteSpace", value: " ", }, 
+      { type: "closeSquareBracket", value: "]"}
     ],
+    AST: {
+      main: [
+        {
+          set: {
+            setIdentifier: 'NASAL_PULMONIC_CONSONANTS',
+            setExpression: [ 'm̥', 'm', 'ɱ' ]
+          }
+        }
+      ]
+    }
+    ,
     code: ''
   },
   commaSetDefinition: {
@@ -137,14 +162,16 @@ set NASAL_PULMONIC_CONSONANTS               = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, 
     AST: {
       main: [
         { 
-          set: {
-            setIdentifier: 'NASAL_PULMONIC_CONSONANTS',
-            items: [ 'm̥', 'm', 'ɱ', 'n̼', 'n̥', 'n', 'ɳ̊', 'ɳ', 'ɲ̊', 'ɲ', `ŋ`, ' ̊ŋ', 'ɴ' ]
-          },
-          set: {
-            setIdentifier: 'STOP_PULMONIC_CONSONANTS',
-            items: [ 'p', 'b', 'p̪', 'b̪', 't̼', 'd̼', 't', 'd', 'ʈ', 'ɖ', 'c', 'ɟ', 'k', 'ɡ', 'q', 'ɢ', 'ʡ', 'ʔ' ]
-          }
+          set: [
+            {
+              setIdentifier: 'NASAL_PULMONIC_CONSONANTS',
+              items: [ 'm̥', 'm', 'ɱ', 'n̼', 'n̥', 'n', 'ɳ̊', 'ɳ', 'ɲ̊', 'ɲ', `ŋ`, ' ̊ŋ', 'ɴ' ]
+            },
+            {
+              setIdentifier: 'STOP_PULMONIC_CONSONANTS',
+              items: [ 'p', 'b', 'p̪', 'b̪', 't̼', 'd̼', 't', 'd', 'ʈ', 'ɖ', 'c', 'ɟ', 'k', 'ɡ', 'q', 'ɢ', 'ʡ', 'ʔ' ]
+            }
+          ]
         }
       ]
     }
diff --git a/src/utils/latl/test/parser.test.js b/src/utils/latl/test/parser.test.js
index 89bb518..ce36f87 100644
--- a/src/utils/latl/test/parser.test.js
+++ b/src/utils/latl/test/parser.test.js
@@ -10,6 +10,13 @@ describe('parser', () => {
     expect(feedResults[0]).toStrictEqual(AST)
   })
 
+  it('parses simple set definition', () => {
+    const { latl, AST } = assertionData.simpleSetDefinition;
+    const feedResults = parser().feed(latl).results;
+    expect(feedResults.length).toBe(1);
+    expect(feedResults[0]).toStrictEqual(AST);
+  })
+
   it('parses multiple set definitions with comma operator', () => {
     const { latl, AST } = assertionData.commaSetDefinition;
     const feedResults = parser().feed(latl).results;
@@ -17,35 +24,45 @@ describe('parser', () => {
     expect(feedResults[0]).toStrictEqual(AST);
   });
 
-  // it('lexes set definition with alias', () => {
+  it.todo('lexes set definition with alias'
+  // , () => {
   //   const { latl, tokens } = assertionData.setAliasDefinition;
   //   const stream           = getStream(latl);
   //   expect(stream).toStrictEqual(tokens);
-  // });
+  // }
+  );
 
-  // it('lexes set definition with set join', () => {
+  it.todo('lexes set definition with set join'
+  // , () => {
   //   const { latl, tokens } = assertionData.setDefinitionJoin;
   //   const stream           = getStream(latl);
   //   expect(stream).toStrictEqual(tokens);
-  // });
+  // }
+  );
 
-  // it('lexes set definition with yield operation', () => {
+  it.todo('lexes set definition with yield operation'
+  // , () => {
   //   const { latl, tokens } = assertionData.setDefinitionYield;
   //   const stream           = getStream(latl);
   //   expect(stream).toStrictEqual(tokens);
-  // });
+  // }
+  );
 
-  // it('lexes all set join operations', () => {
+  it.todo('lexes all set join operations'
+  // , () => {
   //   const { latl, tokens } = assertionData.setOperationsJoin;
   //   const stream           = getStream(latl);
   //   expect(stream).toStrictEqual(tokens);
-  // });
+  // }
+  );
 
-  // it('lexes set filter, concat, and dissoc operations', () => {
+  it.todo('lexes set filter, concat, and dissoc operations'
+  // , () => {
   //   const { latl, tokens } = assertionData.setOperations;
   //   const stream           = getStream(latl);
   //   expect(stream).toStrictEqual(tokens);
-  // })
+  // }
+  )
 })
 
 // {