Create LICENSE

patch test bug incorrectly oriented square brackets
stub working parse of or Operation on sets; phoneList being read as setAlias
2021-04-14 20:34:07 -04:00 · 2020-05-21 19:36:43 -07:00 · 2020-05-21 19:02:18 -07:00 · 2020-05-18 22:37:19 -07:00 · 2020-05-18 22:02:00 -07:00 · 2020-05-09 22:18:07 -07:00
12 changed files with 1275 additions and 766 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Sorrel
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -14,6 +14,12 @@ Features:
 - multi-character phone support
 - comparative runs for multiple rule sets

+## What is LATL?
+
+[Read the specification](/src/utils/latl/README.md)
+
+LATL is a JavaScript targeting compiled language for doing linguistic analysis and transformations.
+
 ## How do I use FCA?

 An FCA run requires the user to define three parameters:
--- a/public/latl/waffle.latl
+++ b/public/latl/waffle.latl
@ -29,16 +29,16 @@

 ; -------- distinctive groups

-set PLOSIVES      [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
-    AFFRICATES    [ tʃʰ, dʒ ]
-    FRICATIVES    [ f, v, θ, ð, s, z, ʃ, ʒ, ç, x ]
-    NASALS        [ m, ɱ, n, ŋ ]
-    LIQUIDS       [ l, ɹ, ɹʲ, ɹˤ ]
-    SYLLABICS     [ m̩, n̩, l̩, ɹ̩ ]
-    VOWELS        [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
-    GLIDES        [ j, w ]
-    LARYNGEALS    [ h, ɦ, ʔ ]
-    VOWELS        [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
+set PLOSIVES    = [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
+    AFFRICATES  = [ tʃʰ, dʒ ]
+    FRICATIVES  = [ f, v, θ, ð, s, z, ʃ, ʒ, ç, x ]
+    NASALS      = [ m, ɱ, n, ŋ ]
+    LIQUIDS     = [ l, ɹ, ɹʲ, ɹˤ ]
+    SYLLABICS   = [ m̩, n̩, l̩, ɹ̩ ]
+    VOWELS      = [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
+    GLIDES      = [ j, w ]
+    LARYNGEALS  = [ h, ɦ, ʔ ]
+    VOWELS      = [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]

 ; ---- implicit
 ; GLOBAL          { all sets }
@ -48,6 +48,8 @@ set PLOSIVES      [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
 ; { SET_A and SET_B }   inner join
 ; { SET_A or SET_B }    full outer join
 ; { not SET_A }             = { GLOBAL not SET_A }
+
+; ---- unnecessary sugar
 ; { not SET_A nor SET_B }   = { GLOBAL not { SET_A or SET_B } }

 ; ---- set character operations - non-mutable!
@ -62,11 +64,17 @@ set PLOSIVES      [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]

 ; ---- TENTATIVE!
 ; ---- set feature operations - non-mutable!
-; { [ X + feature1 - feature2 ] in SET_A }    FILTER:       where X is any character and feature1 and feature2 are filtering features
+; { [ + feature1 - feature2 ] in SET_A }    FILTER:       where feature1 and feature2 are filtering features
 ; { SET_A yield [ X + feature1 ] }            TRANSFORMATION:  performs transformation with (prepended or) appended character
 ; { SET_A yield [ X - feature1 ] }
 ; { SET_A yield [ X - feature1 + feature2 ] }
-; { [ X + feature1 - feature2 ] in SET_A yield [ - feature1 + feature2 ] }  combined FILTER and TRANSFROMATION
+; { [ X + feature1 - feature2 ] in SET_A yield [ - feature1 + feature2 ] }  combined FILTER and TRANSFORMATION
+
+; ---- MAPPING
+set PLOSIVES = [ p, t, k ],
+    FRICATIVES = [ f, s, x ],
+    ; pairs PLOSIVES with FRICATIVES that have matching features = [ pf, ts, kx ]
+    AFFRICATES = { PLOSIVES yield [ X concat { [ [ X ] - fricative  ] in FRICATIVES } ] }

 ; ---- example with join, character, and feature operations
 ; set SET_C = { [ PHONE +feature1 ] in { SET_A or SET_B } yield [ PHONE concat y ] }
--- a/src/utils/grammar.js
+++ b/src/utils/grammar.js
--- a/src/utils/latl/README.md
+++ b/src/utils/latl/README.md
@ -34,6 +34,23 @@ Sets are collections of pointers to phones. The GLOBAL set contains all phones,
 #### Global Set
 [ GLOBAL ] is a shorthand for [ GLOBAL.SETS ]
 #### Set Definition
+Sets are defined with the set keyword followed by an equal sign and a set expression:
+```
+set SHORT_VOWELS = [ a, i, u ]
+```
+
+A single alias can be provided to the set during definition:
+```
+; the alias N can be used to refer to this set
+set NASAL_PULMONIC_CONSONANTS, N            = [ m, ɱ, n̼, n, ɳ, ɲ, ŋ, ɴ ]
+```
+
+Lists of sets can be defined using a comma followed by whitespace syntax
+```
+set PLOSIVES = [ p, t, k ],
+    FRICATIVES = [ f, s, x ],
+    LABIALIZED_PLOSIVES = { PLOSIVES yield [ X concat ʷ ] }
+```
 #### Set Usage
 #### Set Operations
 ##### 'and' Operation
--- a/src/utils/latl/codeGenerator.js
+++ b/src/utils/latl/codeGenerator.js
@ -0,0 +1,19 @@
+import { parser } from './parser';
+
+export const codeGenerator = (latl) => {
+  const results = parser().feed(latl).results;
+  
+  const nodeReader = (code, node) => {
+    if (node.length) {
+      return results.reduce(nodeReader, code)
+    }
+    if (!node) return code;
+    if (node.main) {
+      return nodeReader(code, node.main)
+    }
+    return code + node;
+  }
+
+  return nodeReader('', results)
+
+}
--- a/src/utils/latl/grammar.js
+++ b/src/utils/latl/grammar.js
@ -7,46 +7,108 @@ function id(x) { return x[0]; }
  const getTerminal = d => d ? d[0] : null;
  const getAll = d => d.map((item, i) => ({ [i]: item }));
  const flag = token => d => d.map(item => ({ [token]: item }))
-  const clearNull = d => d.filter(t => !!t);
+  const clearNull = d => d.filter(t => !!t && (t.length !== 1 || t[0])).map(t => t.length ? clearNull(t) : t);
  const flagIndex = d => d.map((item, i) => ({[i]: item}))
  const remove = _ => null;
  const append = d => d.join('');
  const constructSet =  d => d.reduce((acc, t) => { 
-    if (t && t.type === 'setIdentifier')  acc.push({set: t})
+    if (t && t.type === 'setIdentifier') acc.push({set: t});
    if (t && t.length) acc[acc.length - 1].phones = t;
    return acc;
  }, []);
-  const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
+  const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d);
+  const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {});
 var grammar = {
    Lexer: lexer,
    ParserRules: [
    {"name": "main$ebnf$1", "symbols": []},
-    {"name": "main$ebnf$1$subexpression$1", "symbols": ["statement"]},
+    {"name": "main$ebnf$1$subexpression$1", "symbols": ["_", "statement"]},
    {"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
-    {"name": "main", "symbols": ["main$ebnf$1"], "postprocess": compose(flag('main'), getTerminal)},
+    {"name": "main", "symbols": ["main$ebnf$1", "_"], "postprocess":  pipe(
+          clearNull,
+          // recursive call to fix repeat?
+          d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+          d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+          flag('main'), 
+          getTerminal,
+        ) },
    {"name": "_$ebnf$1$subexpression$1", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)]},
    {"name": "_$ebnf$1", "symbols": ["_$ebnf$1$subexpression$1"], "postprocess": id},
    {"name": "_$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
    {"name": "_", "symbols": ["_$ebnf$1"], "postprocess": remove},
    {"name": "__", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)], "postprocess": remove},
+    {"name": "equal", "symbols": [(lexer.has("equal") ? {type: "equal"} : equal)], "postprocess": remove},
    {"name": "statement", "symbols": ["comment"]},
-    {"name": "statement", "symbols": ["definition"], "postprocess": compose(clearNull, getTerminal)},
-    {"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": compose(remove, getTerminal)},
-    {"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": d => ({token: 'setDefinition', sets: d[2]})},
-    {"name": "setDefinition$ebnf$1", "symbols": []},
-    {"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
-    {"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1", "setDefinition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
-    {"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression"], "postprocess": constructSet},
-    {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess": d => d.filter(t => t && t.length)},
+    {"name": "statement", "symbols": ["definition"], "postprocess":  pipe(
+          d => d.flatMap(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
+          // recursive call to fit repeat?
+          d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+          d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+          // may split from other definition statements
+          d => d.map(t => t && t.length > 1 ? ({ type: 'set', ...objFromArr(t) }) :  null)
+        ) },
+    {"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": pipe(getTerminal, remove)},
+    {"name": "definition$ebnf$1", "symbols": []},
+    {"name": "definition$ebnf$1$subexpression$1", "symbols": ["setDefinition", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
+    {"name": "definition$ebnf$1", "symbols": ["definition$ebnf$1", "definition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
+    {"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "definition$ebnf$1", "setDefinition"], "postprocess":  pipe(
+          // not yet sure why this call is required twice
+          d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
+          d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
+          d => d.map(u => u && u.length ? u.map(v => v.length ? v.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet')[0] : v) : u),
+          clearNull,
+        ) },
+    {"name": "setDefinition$ebnf$1$subexpression$1", "symbols": ["setAlias"]},
+    {"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1$subexpression$1"], "postprocess": id},
+    {"name": "setDefinition$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
+    {"name": "setDefinition", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "setDefinition$ebnf$1", "__", "equal", "__", "setExpression"], "postprocess":  
+        pipe(
+          d => d.filter(t => !!t && t.length !== 0),
+          d => d.map(u => u && u.length ? u.map(t => t && t.length ? t.filter(v => v && v.type !== 'comma') : t) : u),
+          d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t),
+          d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t),
+          d => d.map(t => t.length ?
+            // pretty ugly ([ { type: 'aias', alias: [ string ] }] ) => { setAlias: str }
+            { setAlias: t.reduce((aliases, token) => token && token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] }
+          : t),
+        )    
+                        },
+    {"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)]},
+    {"name": "setExpression$ebnf$1$subexpression$1", "symbols": ["setOperation"]},
+    {"name": "setExpression$ebnf$1", "symbols": ["setExpression$ebnf$1$subexpression$1"], "postprocess": id},
+    {"name": "setExpression$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
+    {"name": "setExpression", "symbols": [(lexer.has("openCurlyBracket") ? {type: "openCurlyBracket"} : openCurlyBracket), "_", "setExpression$ebnf$1", "_", (lexer.has("closeCurlyBracket") ? {type: "closeCurlyBracket"} : closeCurlyBracket)], "postprocess":  
+        pipe(
+          // filters commas and whitespace
+          d => d.filter(t => t && t.length),
+          d => d.map(t => t.map(u => u[0])),
+          flag('setExpression') 
+        ) },
+    {"name": "setAlias", "symbols": [(lexer.has("comma") ? {type: "comma"} : comma), "_", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier)], "postprocess":  pipe(
+          d => d && d.length ? d.filter(t => !!t) : d,
+          d => d.map(t => t.type === 'setIdentifier' ? t.toString() : null),
+          d => d.filter(t => !!t),
+          d => ({type: 'alias', alias: d }),
+        ) },
    {"name": "phoneList$ebnf$1", "symbols": []},
-    {"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), (lexer.has("comma") ? {type: "comma"} : comma), "_"]},
+    {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": []},
+    {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1", "symbols": [(lexer.has("comma") ? {type: "comma"} : comma), "_"]},
+    {"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": ["phoneList$ebnf$1$subexpression$1$ebnf$1", "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
+    {"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), "phoneList$ebnf$1$subexpression$1$ebnf$1"]},
    {"name": "phoneList$ebnf$1", "symbols": ["phoneList$ebnf$1", "phoneList$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
-    {"name": "phoneList", "symbols": ["phoneList$ebnf$1", (lexer.has("phone") ? {type: "phone"} : phone)], "postprocess":  d => d.filter(t => t && (t.type === 'phone' || t.length) )
-        .map(t => {
-          if (!t.length) return t;
-          t.filter(st => st && st.type === 'phone')
-          return t;
-        }) }
+    {"name": "phoneList", "symbols": ["phoneList$ebnf$1"], "postprocess":  
+        pipe(
+          d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d
+        )
+                        },
+    {"name": "setOperation", "symbols": ["orOperation"]},
+    {"name": "setOperation", "symbols": [(lexer.has("identifier") ? {type: "identifier"} : identifier)], "postprocess":  pipe(
+          d => d.type ? d : ({ identifier: d.toString(), type: 'identifier' })
+        )},
+    {"name": "orOperation", "symbols": ["_", "setOperation", "__", (lexer.has("kwSetOr") ? {type: "kwSetOr"} : kwSetOr), "__", "setOperation", "_"], "postprocess":  pipe(
+          d => d.filter(d => !!d),
+          d => ({ type: 'operator', operator: 'or', operands: [ d[0], d[2] ] }),
+        ) }
 ]
  , ParserStart: "main"
 }
--- a/src/utils/latl/grammar.ne
+++ b/src/utils/latl/grammar.ne
@ -3,22 +3,30 @@
  const getTerminal = d => d ? d[0] : null;
  const getAll = d => d.map((item, i) => ({ [i]: item }));
  const flag = token => d => d.map(item => ({ [token]: item }))
-  const clearNull = d => d.filter(t => !!t);
+  const clearNull = d => d.filter(t => !!t && (t.length !== 1 || t[0])).map(t => t.length ? clearNull(t) : t);
  const flagIndex = d => d.map((item, i) => ({[i]: item}))
  const remove = _ => null;
  const append = d => d.join('');
  const constructSet =  d => d.reduce((acc, t) => { 
-    if (t && t.type === 'setIdentifier')  acc.push({set: t})
+    if (t && t.type === 'setIdentifier') acc.push({set: t});
    if (t && t.length) acc[acc.length - 1].phones = t;
    return acc;
  }, []);
-  const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
+  const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d);
+  const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {});
 %}

@lexer lexer

-main            -> (statement):* 
-  {% compose(flag('main'), getTerminal) %}
+main            -> (_ statement):* _
+  {% pipe(
+    clearNull,
+    // recursive call to fix repeat?
+    d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+    d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+    flag('main'), 
+    getTerminal,
+  ) %}

 _               -> (%whiteSpace):? 
  {% remove %}
@ -26,54 +34,76 @@ _               -> (%whiteSpace):?
 __              -> %whiteSpace 
  {% remove %}

+equal           -> %equal
+  {% remove %}
+
 statement       -> comment | definition
-  {% compose(clearNull, getTerminal) %}
+  {% pipe(
+    d => d.flatMap(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
+    // recursive call to fit repeat?
+    d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+    d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
+    // may split from other definition statements
+    d => d.map(t => t && t.length > 1 ? ({ type: 'set', ...objFromArr(t) }) :  null)
+  ) %}

 comment         -> %comment 
-  {% compose(remove, getTerminal) %}
+  {% pipe(getTerminal, remove) %}

 # SETS
-definition      -> %kwSet __ setDefinition {% d => ({token: 'setDefinition', sets: d[2]}) %}
-setDefinition   -> (%setIdentifier __ %equal __ setExpression %comma __):* %setIdentifier __ %equal __ setExpression
-  {% constructSet %}
+definition      -> %kwSet __ (setDefinition %comma __):* setDefinition
+                {% pipe(
+                  // not yet sure why this call is required twice
+                  d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
+                  d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
+                  d => d.map(u => u && u.length ? u.map(v => v.length ? v.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet')[0] : v) : u),
+                  clearNull,
+                ) %}
+setDefinition   -> %setIdentifier (setAlias):? __ equal __ setExpression
+                {% 
+                  pipe(
+                    d => d.filter(t => !!t && t.length !== 0),
+                    d => d.map(u => u && u.length ? u.map(t => t && t.length ? t.filter(v => v && v.type !== 'comma') : t) : u),
+                    d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t),
+                    d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t),
+                    d => d.map(t => t.length ?
+                      // pretty ugly ([ { type: 'aias', alias: [ string ] }] ) => { setAlias: str }
+                      { setAlias: t.reduce((aliases, token) => token && token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] }
+                    : t),
+                  )    
+                %}
 setExpression   -> %openSquareBracket _ phoneList _ %closeSquareBracket
-  {% d => d.filter(t => t && t.length) %}
-phoneList       -> (%phone %comma _):* %phone
-  {% d => d.filter(t => t && (t.type === 'phone' || t.length) )
-  .map(t => {
-    if (!t.length) return t;
-    t.filter(st => st && st.type === 'phone')
-    return t;
-  }) %}
+                | %openCurlyBracket _ (setOperation):? _ %closeCurlyBracket
+                {% 
+                  pipe(
+                    // filters commas and whitespace
+                    d => d.filter(t => t && t.length),
+                    d => d.map(t => t.map(u => u[0])),
+                    flag('setExpression') 
+                  ) %}

+setAlias        -> %comma _ %setIdentifier
+                {% pipe(
+                  d => d && d.length ? d.filter(t => !!t) : d,
+                  d => d.map(t => t.type === 'setIdentifier' ? t.toString() : null),
+                  d => d.filter(t => !!t),
+                  d => ({type: 'alias', alias: d }),
+                ) %}

-# assignmentExpression:
-# 	/*
-# 	 * SPEC:
-# 	 * conditionalExpression
-# 	 * | leftHandSideExpression assignmentOperator assignmentExpression
-# 	 */
-# 	(leftHandSideExpression assignmentOperator) =>
-# 	leftHandSideExpression assignmentOperator assignmentExpression
-# 	| conditionalExpression
-# 	;
+phoneList       -> (%phone (%comma _):* ):*
+                {% 
+                  pipe(
+                    d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d
+                  )
+                %}
+setOperation    -> orOperation 
+                |  %identifier
+                {% pipe(
+                  d => d.type ? d : ({ identifier: d.toString(), type: 'identifier' })
+                )%}

-# assignmentExpressionNoln:
-# 	conditionalExpressionNoln
-# 	| leftHandSideExpression assignmentOperator assignmentExpressionNoln
-# 	;
-
-# assignmentOperator:
-# 	/* note that in the grammar these are listed out explicitely */
-# 	EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ
-# 	| GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ
-# 	;
-
-# expression:
-# 	/* 
-# 	 * SPEC:
-# 	 * assignmentExpression
-# 	 * | expression COMMA assignmentExpression
-# 	 */
-# 	assignmentExpression (expressionTail)*
-# 	;
+orOperation     -> _ setOperation __ %kwSetOr __ setOperation _
+                {% pipe(
+                  d => d.filter(d => !!d),
+                  d => ({ type: 'operator', operator: 'or', operands: [ d[0], d[2] ] }),
+                ) %}
--- a/src/utils/latl/lexer.js
+++ b/src/utils/latl/lexer.js
@ -1,21 +1,28 @@
-const moo = require('moo');
+const moo = require("moo");

 const lexer = moo.states({
  main: {
    comment: /;.*$/,
-    star:          { match: /\*/, push: 'epoch' },
-    slash:                { match: /\//, push: 'lexicon' },
+    star: { match: /\*/, push: "epoch" },
+    slash: { match: /\//, push: "lexicon" },
    // change so that identifiers are always upper, keywords are always lower, phones are always lower
-    'kwSet':             { match: 'set', type: moo.keywords({ 'kwSet': 'set '}), push: 'setDefinition'},
-    identifier:           { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, },
-    openBracket:          { match: /\[/, push: 'feature' },
+    kwSet: {
+      match: "set",
+      type: moo.keywords({ kwSet: "set " }),
+      push: "setDefinition",
+    },
+    identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/ },
+    openBracket: { match: /\[/, push: "feature" },
    whiteSpace: { match: /\s+/, lineBreaks: true },
-    newLine:              { match: /\n+/, lineBreaks: true }
+    newLine: { match: /\n+/, lineBreaks: true },
  },

  epoch: {
-    identifier:           { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, push: 'rule' },
-    openParen:            { match: /\(/, push: 'ruleDefinition' },
+    identifier: {
+      match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/,
+      push: "rule",
+    },
+    openParen: { match: /\(/, push: "ruleDefinition" },
    pipe: { match: /\|/, pop: true },
    greaterThan: /\>/,
    arrow: /\-\>/,
@ -23,60 +30,60 @@ const lexer = moo.states({
    slash: /\//,
    dot: /\./,
    underscore: /\_/,
-    newLine:              { match: /\n/, lineBreaks: true }
+    newLine: { match: /\n/, lineBreaks: true },
  },

  ruleDefinition: {
-    doubleTick:           { match: /``/, push: 'ruleName' },
-    singleTick:           { match: /`/, push: 'ruleDescription' },
+    doubleTick: { match: /``/, push: "ruleName" },
+    singleTick: { match: /`/, push: "ruleDescription" },
    // push rule
    closeParen: { match: /\)/, pop: true },
-    newLine:              { match: /\n/, lineBreaks: true }
+    newLine: { match: /\n/, lineBreaks: true },
  },

  ruleName: {
    ruleName: { match: /.+(?=``)/ },
-    doubleTick:           { match: /``/, pop: true }
+    doubleTick: { match: /``/, pop: true },
  },

  ruleDescription: {
    ruleDescription: { match: /.+(?=`)/ },
-    singleTick:           { match: /`/, pop: true }
+    singleTick: { match: /`/, pop: true },
  },

  rule: {
-    openSquareBracket:    { match: /\[/, push: 'ruleFeature' },
+    openSquareBracket: { match: /\[/, push: "ruleFeature" },
    // whiteSpace:           { match: /\s/ },
-    newLine:              { match: /\n/, pop: true, lineBreaks: true }
+    newLine: { match: /\n/, pop: true, lineBreaks: true },
  },

  ruleFeature: {
    ruleFeature: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/ },
    closeBracket: { match: /\]/, pop: true },
-    newLine:              { match: /\n/, lineBreaks: true }
+    newLine: { match: /\n/, lineBreaks: true },
  },

  lexicon: {
    slash: { match: /\//, pop: true },
-    newLine:              { match: /\n/, lineBreaks: true }
+    newLine: { match: /\n/, lineBreaks: true },
  },

  feature: {
    closeBracket: { match: /\]/, pop: true },
    positiveAssignment: /\+=/,
    negativeAssignment: /\-=/,
-    newLine:              { match: /\n/, lineBreaks: true }
+    newLine: { match: /\n/, lineBreaks: true },
  },

  setDefinition: {
    comment: /;.*$/,
    setIdentifier: { match: /[A-Z]+[A-Z_]*/ },
-    openCurlyBracket:     { match: /\{/, push: 'setOperation' },
+    openCurlyBracket: { match: /\{/, push: "setOperation" },
    equal: /=/,
    openSquareBracket: /\[/,
    phone: /[\u00c0-\u03FFa-z]+/,
    closeSquareBracket: { match: /\]/ },
-    comma:                { match: /,/, push: 'commaOperation' },
+    comma: { match: /,/, push: "commaOperation" },
    whiteSpace: { match: /[\t ]+/ },
    newLine: { match: /\n/, pop: true, lineBreaks: true },
  },
@ -84,24 +91,25 @@ const lexer = moo.states({
  setOperation: {
    closeCurlyBracket: { match: /\}/, pop: true },
    // ! restrict identifiers
-    keyword:              { match: ['not', 'and', 'or', 'nor', 'in', 'yield', 'concat', 'dissoc'], type: moo.keywords({
-      'kw-set-not':       'not' ,
-      'kw-set-and':       'and' ,
-      'kw-set-or':        'or' ,
-      'kw-set-nor':       'nor' ,
-      'kw-set-in':        'in' ,
-      'kw-set-yield':     'yield' ,
-      'kw-set-concat':    'concat',
-      'kw-set-dissoc':    'dissoc'
-      })
+    keyword: {
+      match: ["not", "and", "or", "nor", "in", "yield", "concat", "dissoc"],
+      type: moo.keywords({
+        kwSetNot: "not",
+        kwSetAnd: "and",
+        kwSetOr: "or",
+        kwSetNor: "nor",
+        kwSetIn: "in",
+        kwSetYield: "yield",
+        kwSetConcat: "concat",
+        kwSetDissoc: "dissoc",
+      }),
    },
    identifier: /[A-Z]+[A-Z_]+/,
-    whiteSpace:           /[\t ]+/,
-    openSquareBracket:    /\]/,
-    closeSquareBracket:   /\[/,
+    whiteSpace: { match: /\s+/, lineBreaks: true },
+    openSquareBracket: /\[/,
+    closeSquareBracket: /\]/,
    identifier: /[A-Z]+[A-Z_]*/,
    phone: /[\u00c0-\u03FFa-z]+/,
-    newLine:              { match: /\n/, lineBreaks: true }
  },

  commaOperation: {
@ -109,9 +117,8 @@ const lexer = moo.states({
    // this prevents popping back to main
    comment: /\s*;.*$/,
    whiteSpace: { match: /\s+/, lineBreaks: true, pop: true },
-    newLine:              { match: /\n/, lineBreaks: true, pop: true }
-  }
-  
+    newLine: { match: /\n/, lineBreaks: true, pop: true },
+  },
 });

 module.exports = { lexer };
--- a/src/utils/latl/test/assertionData.js
+++ b/src/utils/latl/test/assertionData.js
--- a/src/utils/latl/test/codeGenerator.test.js
+++ b/src/utils/latl/test/codeGenerator.test.js
@ -0,0 +1,10 @@
+import { assertionData } from './assertionData';
+import { codeGenerator } from '../codeGenerator';
+
+describe('codeGenerator', () => {
+  it('parses simple comment', () => {
+    const { latl, code } = assertionData.simpleComment;
+    const generatedCode = codeGenerator(latl);
+    expect(generatedCode).toEqual(code);
+  });
+})
--- a/src/utils/latl/test/parser.test.js
+++ b/src/utils/latl/test/parser.test.js
@ -1,49 +1,180 @@
-import { lexer } from '../lexer';
-import { parser } from '../parser';
-import { assertionData } from './assertionData';
+import { lexer } from "../lexer";
+import { parser } from "../parser";
+import { assertionData } from "./assertionData";

-describe('parser', () => {
-  it('parses simple comment', () => {
-    const { latl } = assertionData.simpleComment;
-    const AST = parser().feed(latl).results;
-    expect(AST.length).toBe(1);
-    console.log(AST[0])
-    // expect(AST[0]).toStrictEqual()
-  })
+describe("parser", () => {
+  it("parses simple comment", () => {
+    const { latl, AST } = assertionData.simpleComment;
+    const feedResults = parser().feed(latl).results;
+    expect(feedResults.length).toBe(1);
+    expect(feedResults[0]).toStrictEqual(AST);
+  });

-  // it('parses multiple set definitions with comma operator', () => {
-  //   const { latl } = assertionData.commaSetDefinition;
-  //   const AST = parser().feed(latl)
-  //   console.log(AST)
-  // });
+  it("parses simple set definition", () => {
+    const { latl, AST } = assertionData.simpleSetDefinition;
+    const feedResults = parser().feed(latl).results;
+    expect(feedResults.length).toBe(1);
+    expect(feedResults[0]).toStrictEqual(AST);
+  });

-  // it('lexes set definition with alias', () => {
-  //   const { latl, tokens } = assertionData.setAliasDefinition;
-  //   const stream           = getStream(latl);
-  //   expect(stream).toStrictEqual(tokens);
-  // });
+  it("parses multiple set definitions with comma operator", () => {
+    const { latl, AST } = assertionData.commaSetDefinition;
+    const feedResults = parser().feed(latl).results;
+    expect(feedResults.length).toBe(1);
+    expect(feedResults[0]).toStrictEqual(AST);
+  });

-  // it('lexes set definition with set join', () => {
-  //   const { latl, tokens } = assertionData.setDefinitionJoin;
-  //   const stream           = getStream(latl);
-  //   expect(stream).toStrictEqual(tokens);
-  // });
+  it("lexes set definition with alias", () => {
+    const { latl, AST } = assertionData.setAliasDefinition;
+    const feedResults = parser().feed(latl).results;
+    expect(feedResults[0]).toStrictEqual(AST);
+  });

-  // it('lexes set definition with yield operation', () => {
+  it.skip("lexes set definition with set join", () => {
+    const { latl, AST } = assertionData.setDefinitionJoin;
+    const feedResults = parser().feed(latl).results;
+    expect(feedResults[0]).toStrictEqual(AST);
+  });
+
+  it.todo(
+    "lexes set definition with yield operation"
+    // , () => {
    //   const { latl, tokens } = assertionData.setDefinitionYield;
    //   const stream           = getStream(latl);
    //   expect(stream).toStrictEqual(tokens);
-  // });
+    // }
+  );

-  // it('lexes all set join operations', () => {
+  it.todo(
+    "lexes all set join operations"
+    // , () => {
    //   const { latl, tokens } = assertionData.setOperationsJoin;
    //   const stream           = getStream(latl);
    //   expect(stream).toStrictEqual(tokens);
-  // });
+    // }
+  );

-  // it('lexes set filter, concat, and dissoc operations', () => {
+  it.todo(
+    "lexes set filter, concat, and dissoc operations"
+    // , () => {
    //   const { latl, tokens } = assertionData.setOperations;
    //   const stream           = getStream(latl);
    //   expect(stream).toStrictEqual(tokens);
-  // })
-})
+    // }
+  );
+});
+
+// {
+//   "set":
+//     [
+//       [
+//         [
+//           {
+//             "col": 5,
+//             "line": 2,
+//             "lineBreaks": 0,
+//             "offset": 5,
+//             "text": "NASAL_PULMONIC_CONSONANTS",
+//             "toString": [tokenToString],
+//             "type": "setIdentifier",
+//             "value": "NASAL_PULMONIC_CONSONANTS",
+//           },
+//           null,
+//           {
+//             "col": 45,
+//             "line": 2,
+//             "lineBreaks": 0,
+//             "offset": 45,
+//             "text": "=",
+//             "toString": [tokenToString],
+//             "type": "equal",
+//             "value": "=",
+//           },
+//           null,
+//           [
+//             [
+//               {
+//                 "col": 49,
+//                 "line": 2,
+//                 "lineBreaks": 0,
+//                 "offset": 49,
+//                 "text": "m̥",
+//                 "toString": [tokenToString],
+//                 "type": "phone",
+//                 "value": "m̥",
+//               },
+//               {
+//                 "col": 91,
+//                 "line": 2,
+//                 "lineBreaks": 0,
+//                 "offset": 91,
+//                 "text": "ɴ",
+//                 "toString": [tokenToString],
+//                 "type": "phone",
+//                 "value": "ɴ",
+//               },
+//             ],
+//           ],
+//           {
+//             "col": 94,
+//             "line": 2,
+//             "lineBreaks": 0,
+//             "offset": 94,
+//             "text": ",",
+//             "toString": [tokenToString],
+//             "type": "comma",
+//             "value": ",",
+//           },
+//           null,
+//         ],
+//               ],
+//     -         "setIdentifier": "STOP_PULMONIC_CONSONANTS",
+//       {
+//         "col": 5,
+//         "line": 3,
+//         "lineBreaks": 0,
+//         "offset": 100,
+//         "text": "STOP_PULMONIC_CONSONANTS",
+//         "toString": [tokenToString],
+//         "type": "setIdentifier",
+//         "value": "STOP_PULMONIC_CONSONANTS",
+//       },
+//       null,
+//       {
+//         "col": 45,
+//         "line": 3,
+//         "lineBreaks": 0,
+//         "offset": 140,
+//         "text": "=",
+//         "toString": [tokenToString],
+//         "type": "equal",
+//         "value": "=",
+//               },
+//       null,
+//       [
+//         [
+//           {
+//             "col": 49,
+//             "line": 3,
+//             "lineBreaks": 0,
+//             "offset": 144,
+//             "text": "p",
+//             "toString": [tokenToString],
+//             "type": "phone",
+//             "value": "p",
+//           },
+//           {
+//             "col": 104,
+//             "line": 3,
+//             "lineBreaks": 0,
+//             "offset": 199,
+//             "text": "ʔ",
+//             "toString": [tokenToString],
+//             "type": "phone",
+//             "value": "ʔ",
+//           },
+//         ],
+//       ],
+//     ],
+//     "token": "kwSet",
+//   }
Author	SHA1	Message	Date
Sorrel	96bbb274df	Create LICENSE	2021-04-14 20:34:07 -04:00
Sorrel Bri	bb69a9ffa4	patch test bug incorrectly oriented square brackets	2020-05-21 19:36:43 -07:00
Sorrel Bri	dfae87e408	stub working parse of or Operation on sets; phoneList being read as setAlias	2020-05-21 19:02:18 -07:00
Sorrel Bri	c264b56c2e	stub AST results for set definition with join	2020-05-18 22:37:19 -07:00
Sorrel Bri	73761e6f60	fix syntax errors in example latl file	2020-05-18 22:02:00 -07:00
Sorrel Bri	bb8c05a579	add support for set aliases	2020-05-09 22:18:07 -07:00
Sorrel Bri	9619b4a07c	update latl README with set definition	2020-05-09 16:24:00 -07:00
Sorrel Bri	abfe14b410	construct AST properly for multi set definitions	2020-05-09 15:22:01 -07:00
Sorrel Bri	40aec30537	parse AST for single set definition	2020-05-08 23:32:49 -07:00
Sorrel Bri	3d4d1cd66e	hack set definition postprocessors	2020-05-07 23:24:19 -07:00
Sorrel Bri	dee27b0d30	init codeGenerator in latl	2020-05-06 22:31:15 -07:00
Sorrel Bri	432630e600	add postprocessors to grammar.ne for cleaning tree of empty nodes	2020-04-14 22:03:56 -07:00