Compare commits
12 commits
Author | SHA1 | Date | |
---|---|---|---|
|
96bbb274df | ||
|
bb69a9ffa4 | ||
|
dfae87e408 | ||
|
c264b56c2e | ||
|
73761e6f60 | ||
|
bb8c05a579 | ||
|
9619b4a07c | ||
|
abfe14b410 | ||
|
40aec30537 | ||
|
3d4d1cd66e | ||
|
dee27b0d30 | ||
|
432630e600 |
12 changed files with 1275 additions and 766 deletions
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2021 Sorrel
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -14,6 +14,12 @@ Features:
|
|||
- multi-character phone support
|
||||
- comparative runs for multiple rule sets
|
||||
|
||||
## What is LATL?
|
||||
|
||||
[Read the specification](/src/utils/latl/README.md)
|
||||
|
||||
LATL is a JavaScript targeting compiled language for doing linguistic analysis and transformations.
|
||||
|
||||
## How do I use FCA?
|
||||
|
||||
An FCA run requires the user to define three parameters:
|
||||
|
|
|
@ -29,16 +29,16 @@
|
|||
|
||||
; -------- distinctive groups
|
||||
|
||||
set PLOSIVES [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
|
||||
AFFRICATES [ tʃʰ, dʒ ]
|
||||
FRICATIVES [ f, v, θ, ð, s, z, ʃ, ʒ, ç, x ]
|
||||
NASALS [ m, ɱ, n, ŋ ]
|
||||
LIQUIDS [ l, ɹ, ɹʲ, ɹˤ ]
|
||||
SYLLABICS [ m̩, n̩, l̩, ɹ̩ ]
|
||||
VOWELS [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
|
||||
GLIDES [ j, w ]
|
||||
LARYNGEALS [ h, ɦ, ʔ ]
|
||||
VOWELS [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
|
||||
set PLOSIVES = [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
|
||||
AFFRICATES = [ tʃʰ, dʒ ]
|
||||
FRICATIVES = [ f, v, θ, ð, s, z, ʃ, ʒ, ç, x ]
|
||||
NASALS = [ m, ɱ, n, ŋ ]
|
||||
LIQUIDS = [ l, ɹ, ɹʲ, ɹˤ ]
|
||||
SYLLABICS = [ m̩, n̩, l̩, ɹ̩ ]
|
||||
VOWELS = [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
|
||||
GLIDES = [ j, w ]
|
||||
LARYNGEALS = [ h, ɦ, ʔ ]
|
||||
VOWELS = [ æ, e, ə, ɑ, ɔ, ɪ̞, ɛ, ʌ, ʊ̞, i, u̟ ]
|
||||
|
||||
; ---- implicit
|
||||
; GLOBAL { all sets }
|
||||
|
@ -48,6 +48,8 @@ set PLOSIVES [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
|
|||
; { SET_A and SET_B } inner join
|
||||
; { SET_A or SET_B } full outer join
|
||||
; { not SET_A } = { GLOBAL not SET_A }
|
||||
|
||||
; ---- unnecessary sugar
|
||||
; { not SET_A nor SET_B } = { GLOBAL not { SET_A or SET_B } }
|
||||
|
||||
; ---- set character operations - non-mutable!
|
||||
|
@ -62,11 +64,17 @@ set PLOSIVES [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
|
|||
|
||||
; ---- TENTATIVE!
|
||||
; ---- set feature operations - non-mutable!
|
||||
; { [ X + feature1 - feature2 ] in SET_A } FILTER: where X is any character and feature1 and feature2 are filtering features
|
||||
; { [ + feature1 - feature2 ] in SET_A } FILTER: where feature1 and feature2 are filtering features
|
||||
; { SET_A yield [ X + feature1 ] } TRANSFORMATION: performs transformation with (prepended or) appended character
|
||||
; { SET_A yield [ X - feature1 ] }
|
||||
; { SET_A yield [ X - feature1 + feature2 ] }
|
||||
; { [ X + feature1 - feature2 ] in SET_A yield [ - feature1 + feature2 ] } combined FILTER and TRANSFROMATION
|
||||
; { [ X + feature1 - feature2 ] in SET_A yield [ - feature1 + feature2 ] } combined FILTER and TRANSFORMATION
|
||||
|
||||
; ---- MAPPING
|
||||
set PLOSIVES = [ p, t, k ],
|
||||
FRICATIVES = [ f, s, x ],
|
||||
; pairs PLOSIVES with FRICATIVES that have matching features = [ pf, ts, kx ]
|
||||
AFFRICATES = { PLOSIVES yield [ X concat { [ [ X ] - fricative ] in FRICATIVES } ] }
|
||||
|
||||
; ---- example with join, character, and feature operations
|
||||
; set SET_C = { [ PHONE +feature1 ] in { SET_A or SET_B } yield [ PHONE concat y ] }
|
||||
|
|
|
@ -34,6 +34,23 @@ Sets are collections of pointers to phones. The GLOBAL set contains all phones,
|
|||
#### Global Set
|
||||
[ GLOBAL ] is a shorthand for [ GLOBAL.SETS ]
|
||||
#### Set Definition
|
||||
Sets are defined with the set keyword followed by an equal sign and a set expression:
|
||||
```
|
||||
set SHORT_VOWELS = [ a, i, u ]
|
||||
```
|
||||
|
||||
A single alias can be provided to the set during definition:
|
||||
```
|
||||
; the alias N can be used to refer to this set
|
||||
set NASAL_PULMONIC_CONSONANTS, N = [ m, ɱ, n̼, n, ɳ, ɲ, ŋ, ɴ ]
|
||||
```
|
||||
|
||||
Lists of sets can be defined using a comma followed by whitespace syntax
|
||||
```
|
||||
set PLOSIVES = [ p, t, k ],
|
||||
FRICATIVES = [ f, s, x ],
|
||||
LABIALIZED_PLOSIVES = { PLOSIVES yield [ X concat ʷ ] }
|
||||
```
|
||||
#### Set Usage
|
||||
#### Set Operations
|
||||
##### 'and' Operation
|
||||
|
|
19
src/utils/latl/codeGenerator.js
Normal file
19
src/utils/latl/codeGenerator.js
Normal file
|
@ -0,0 +1,19 @@
|
|||
import { parser } from './parser';
|
||||
|
||||
export const codeGenerator = (latl) => {
|
||||
const results = parser().feed(latl).results;
|
||||
|
||||
const nodeReader = (code, node) => {
|
||||
if (node.length) {
|
||||
return results.reduce(nodeReader, code)
|
||||
}
|
||||
if (!node) return code;
|
||||
if (node.main) {
|
||||
return nodeReader(code, node.main)
|
||||
}
|
||||
return code + node;
|
||||
}
|
||||
|
||||
return nodeReader('', results)
|
||||
|
||||
}
|
|
@ -5,48 +5,110 @@ function id(x) { return x[0]; }
|
|||
|
||||
const { lexer } = require('./lexer.js');
|
||||
const getTerminal = d => d ? d[0] : null;
|
||||
const getAll = d => d.map((item, i) => ({[i]: item}));
|
||||
const flag = token => d => d.map(item => ({[token]: item}))
|
||||
const clearNull = d => d.filter(t => !!t);
|
||||
const getAll = d => d.map((item, i) => ({ [i]: item }));
|
||||
const flag = token => d => d.map(item => ({ [token]: item }))
|
||||
const clearNull = d => d.filter(t => !!t && (t.length !== 1 || t[0])).map(t => t.length ? clearNull(t) : t);
|
||||
const flagIndex = d => d.map((item, i) => ({[i]: item}))
|
||||
const remove = _ => null;
|
||||
const append = d => d.join('');
|
||||
const constructSet = d => d.reduce((acc, t) => {
|
||||
if (t && t.type === 'setIdentifier') acc.push({set: t})
|
||||
if (t && t.type === 'setIdentifier') acc.push({set: t});
|
||||
if (t && t.length) acc[acc.length - 1].phones = t;
|
||||
return acc;
|
||||
}, []);
|
||||
const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
|
||||
const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d);
|
||||
const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {});
|
||||
var grammar = {
|
||||
Lexer: lexer,
|
||||
ParserRules: [
|
||||
{"name": "main$ebnf$1", "symbols": []},
|
||||
{"name": "main$ebnf$1$subexpression$1", "symbols": ["statement"]},
|
||||
{"name": "main$ebnf$1$subexpression$1", "symbols": ["_", "statement"]},
|
||||
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "main", "symbols": ["main$ebnf$1"], "postprocess": compose(flag('main'), getTerminal)},
|
||||
{"name": "main", "symbols": ["main$ebnf$1", "_"], "postprocess": pipe(
|
||||
clearNull,
|
||||
// recursive call to fix repeat?
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
flag('main'),
|
||||
getTerminal,
|
||||
) },
|
||||
{"name": "_$ebnf$1$subexpression$1", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)]},
|
||||
{"name": "_$ebnf$1", "symbols": ["_$ebnf$1$subexpression$1"], "postprocess": id},
|
||||
{"name": "_$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
|
||||
{"name": "_", "symbols": ["_$ebnf$1"], "postprocess": remove},
|
||||
{"name": "__", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)], "postprocess": remove},
|
||||
{"name": "equal", "symbols": [(lexer.has("equal") ? {type: "equal"} : equal)], "postprocess": remove},
|
||||
{"name": "statement", "symbols": ["comment"]},
|
||||
{"name": "statement", "symbols": ["definition"], "postprocess": compose(clearNull, getTerminal)},
|
||||
{"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": compose(remove, getTerminal)},
|
||||
{"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": d => ({token: 'setDefinition', sets: d[2]})},
|
||||
{"name": "setDefinition$ebnf$1", "symbols": []},
|
||||
{"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
|
||||
{"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1", "setDefinition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression"], "postprocess": constructSet},
|
||||
{"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess": d => d.filter(t => t && t.length)},
|
||||
{"name": "statement", "symbols": ["definition"], "postprocess": pipe(
|
||||
d => d.flatMap(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
|
||||
// recursive call to fit repeat?
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
// may split from other definition statements
|
||||
d => d.map(t => t && t.length > 1 ? ({ type: 'set', ...objFromArr(t) }) : null)
|
||||
) },
|
||||
{"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": pipe(getTerminal, remove)},
|
||||
{"name": "definition$ebnf$1", "symbols": []},
|
||||
{"name": "definition$ebnf$1$subexpression$1", "symbols": ["setDefinition", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
|
||||
{"name": "definition$ebnf$1", "symbols": ["definition$ebnf$1", "definition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "definition$ebnf$1", "setDefinition"], "postprocess": pipe(
|
||||
// not yet sure why this call is required twice
|
||||
d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
|
||||
d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
|
||||
d => d.map(u => u && u.length ? u.map(v => v.length ? v.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet')[0] : v) : u),
|
||||
clearNull,
|
||||
) },
|
||||
{"name": "setDefinition$ebnf$1$subexpression$1", "symbols": ["setAlias"]},
|
||||
{"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1$subexpression$1"], "postprocess": id},
|
||||
{"name": "setDefinition$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
|
||||
{"name": "setDefinition", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "setDefinition$ebnf$1", "__", "equal", "__", "setExpression"], "postprocess":
|
||||
pipe(
|
||||
d => d.filter(t => !!t && t.length !== 0),
|
||||
d => d.map(u => u && u.length ? u.map(t => t && t.length ? t.filter(v => v && v.type !== 'comma') : t) : u),
|
||||
d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t),
|
||||
d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t),
|
||||
d => d.map(t => t.length ?
|
||||
// pretty ugly ([ { type: 'aias', alias: [ string ] }] ) => { setAlias: str }
|
||||
{ setAlias: t.reduce((aliases, token) => token && token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] }
|
||||
: t),
|
||||
)
|
||||
},
|
||||
{"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)]},
|
||||
{"name": "setExpression$ebnf$1$subexpression$1", "symbols": ["setOperation"]},
|
||||
{"name": "setExpression$ebnf$1", "symbols": ["setExpression$ebnf$1$subexpression$1"], "postprocess": id},
|
||||
{"name": "setExpression$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
|
||||
{"name": "setExpression", "symbols": [(lexer.has("openCurlyBracket") ? {type: "openCurlyBracket"} : openCurlyBracket), "_", "setExpression$ebnf$1", "_", (lexer.has("closeCurlyBracket") ? {type: "closeCurlyBracket"} : closeCurlyBracket)], "postprocess":
|
||||
pipe(
|
||||
// filters commas and whitespace
|
||||
d => d.filter(t => t && t.length),
|
||||
d => d.map(t => t.map(u => u[0])),
|
||||
flag('setExpression')
|
||||
) },
|
||||
{"name": "setAlias", "symbols": [(lexer.has("comma") ? {type: "comma"} : comma), "_", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier)], "postprocess": pipe(
|
||||
d => d && d.length ? d.filter(t => !!t) : d,
|
||||
d => d.map(t => t.type === 'setIdentifier' ? t.toString() : null),
|
||||
d => d.filter(t => !!t),
|
||||
d => ({type: 'alias', alias: d }),
|
||||
) },
|
||||
{"name": "phoneList$ebnf$1", "symbols": []},
|
||||
{"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), (lexer.has("comma") ? {type: "comma"} : comma), "_"]},
|
||||
{"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": []},
|
||||
{"name": "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1", "symbols": [(lexer.has("comma") ? {type: "comma"} : comma), "_"]},
|
||||
{"name": "phoneList$ebnf$1$subexpression$1$ebnf$1", "symbols": ["phoneList$ebnf$1$subexpression$1$ebnf$1", "phoneList$ebnf$1$subexpression$1$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), "phoneList$ebnf$1$subexpression$1$ebnf$1"]},
|
||||
{"name": "phoneList$ebnf$1", "symbols": ["phoneList$ebnf$1", "phoneList$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "phoneList", "symbols": ["phoneList$ebnf$1", (lexer.has("phone") ? {type: "phone"} : phone)], "postprocess": d => d.filter(t => t && (t.type === 'phone' || t.length) )
|
||||
.map(t => {
|
||||
if (!t.length) return t;
|
||||
t.filter(st => st && st.type === 'phone')
|
||||
return t;
|
||||
}) }
|
||||
{"name": "phoneList", "symbols": ["phoneList$ebnf$1"], "postprocess":
|
||||
pipe(
|
||||
d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d
|
||||
)
|
||||
},
|
||||
{"name": "setOperation", "symbols": ["orOperation"]},
|
||||
{"name": "setOperation", "symbols": [(lexer.has("identifier") ? {type: "identifier"} : identifier)], "postprocess": pipe(
|
||||
d => d.type ? d : ({ identifier: d.toString(), type: 'identifier' })
|
||||
)},
|
||||
{"name": "orOperation", "symbols": ["_", "setOperation", "__", (lexer.has("kwSetOr") ? {type: "kwSetOr"} : kwSetOr), "__", "setOperation", "_"], "postprocess": pipe(
|
||||
d => d.filter(d => !!d),
|
||||
d => ({ type: 'operator', operator: 'or', operands: [ d[0], d[2] ] }),
|
||||
) }
|
||||
]
|
||||
, ParserStart: "main"
|
||||
}
|
||||
|
|
|
@ -1,24 +1,32 @@
|
|||
@{%
|
||||
const { lexer } = require('./lexer.js');
|
||||
const getTerminal = d => d ? d[0] : null;
|
||||
const getAll = d => d.map((item, i) => ({[i]: item}));
|
||||
const flag = token => d => d.map(item => ({[token]: item}))
|
||||
const clearNull = d => d.filter(t => !!t);
|
||||
const getAll = d => d.map((item, i) => ({ [i]: item }));
|
||||
const flag = token => d => d.map(item => ({ [token]: item }))
|
||||
const clearNull = d => d.filter(t => !!t && (t.length !== 1 || t[0])).map(t => t.length ? clearNull(t) : t);
|
||||
const flagIndex = d => d.map((item, i) => ({[i]: item}))
|
||||
const remove = _ => null;
|
||||
const append = d => d.join('');
|
||||
const constructSet = d => d.reduce((acc, t) => {
|
||||
if (t && t.type === 'setIdentifier') acc.push({set: t})
|
||||
if (t && t.type === 'setIdentifier') acc.push({set: t});
|
||||
if (t && t.length) acc[acc.length - 1].phones = t;
|
||||
return acc;
|
||||
}, []);
|
||||
const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
|
||||
const pipe = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d);
|
||||
const objFromArr = d => d.reduce((obj, item) => ({ ...obj, ...item }), {});
|
||||
%}
|
||||
|
||||
@lexer lexer
|
||||
|
||||
main -> (statement):*
|
||||
{% compose(flag('main'), getTerminal) %}
|
||||
main -> (_ statement):* _
|
||||
{% pipe(
|
||||
clearNull,
|
||||
// recursive call to fix repeat?
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
flag('main'),
|
||||
getTerminal,
|
||||
) %}
|
||||
|
||||
_ -> (%whiteSpace):?
|
||||
{% remove %}
|
||||
|
@ -26,54 +34,76 @@ _ -> (%whiteSpace):?
|
|||
__ -> %whiteSpace
|
||||
{% remove %}
|
||||
|
||||
equal -> %equal
|
||||
{% remove %}
|
||||
|
||||
statement -> comment | definition
|
||||
{% compose(clearNull, getTerminal) %}
|
||||
{% pipe(
|
||||
d => d.flatMap(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
|
||||
// recursive call to fit repeat?
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
d => d.map(t => t && t.length === 1 && t[0] ? t[0] : t),
|
||||
// may split from other definition statements
|
||||
d => d.map(t => t && t.length > 1 ? ({ type: 'set', ...objFromArr(t) }) : null)
|
||||
) %}
|
||||
|
||||
comment -> %comment
|
||||
{% compose(remove, getTerminal) %}
|
||||
{% pipe(getTerminal, remove) %}
|
||||
|
||||
# SETS
|
||||
definition -> %kwSet __ setDefinition {% d => ({token: 'setDefinition', sets: d[2]}) %}
|
||||
setDefinition -> (%setIdentifier __ %equal __ setExpression %comma __):* %setIdentifier __ %equal __ setExpression
|
||||
{% constructSet %}
|
||||
definition -> %kwSet __ (setDefinition %comma __):* setDefinition
|
||||
{% pipe(
|
||||
// not yet sure why this call is required twice
|
||||
d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
|
||||
d => d.map(u => u && u.length ? u.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet') : u),
|
||||
d => d.map(u => u && u.length ? u.map(v => v.length ? v.filter(t => t && t.type !== 'comma' && t.type !== 'kwSet')[0] : v) : u),
|
||||
clearNull,
|
||||
) %}
|
||||
setDefinition -> %setIdentifier (setAlias):? __ equal __ setExpression
|
||||
{%
|
||||
pipe(
|
||||
d => d.filter(t => !!t && t.length !== 0),
|
||||
d => d.map(u => u && u.length ? u.map(t => t && t.length ? t.filter(v => v && v.type !== 'comma') : t) : u),
|
||||
d => d.map(t => t.type === 'setIdentifier' ? { setIdentifier: t.toString() } : t),
|
||||
d => d.map(t => t && t.length && t[0].hasOwnProperty('setExpression') ? t[0] : t),
|
||||
d => d.map(t => t.length ?
|
||||
// pretty ugly ([ { type: 'aias', alias: [ string ] }] ) => { setAlias: str }
|
||||
{ setAlias: t.reduce((aliases, token) => token && token.type === 'alias' ? [...aliases, ...token.alias] : aliases, [])[0] }
|
||||
: t),
|
||||
)
|
||||
%}
|
||||
setExpression -> %openSquareBracket _ phoneList _ %closeSquareBracket
|
||||
{% d => d.filter(t => t && t.length) %}
|
||||
phoneList -> (%phone %comma _):* %phone
|
||||
{% d => d.filter(t => t && (t.type === 'phone' || t.length) )
|
||||
.map(t => {
|
||||
if (!t.length) return t;
|
||||
t.filter(st => st && st.type === 'phone')
|
||||
return t;
|
||||
}) %}
|
||||
| %openCurlyBracket _ (setOperation):? _ %closeCurlyBracket
|
||||
{%
|
||||
pipe(
|
||||
// filters commas and whitespace
|
||||
d => d.filter(t => t && t.length),
|
||||
d => d.map(t => t.map(u => u[0])),
|
||||
flag('setExpression')
|
||||
) %}
|
||||
|
||||
setAlias -> %comma _ %setIdentifier
|
||||
{% pipe(
|
||||
d => d && d.length ? d.filter(t => !!t) : d,
|
||||
d => d.map(t => t.type === 'setIdentifier' ? t.toString() : null),
|
||||
d => d.filter(t => !!t),
|
||||
d => ({type: 'alias', alias: d }),
|
||||
) %}
|
||||
|
||||
# assignmentExpression:
|
||||
# /*
|
||||
# * SPEC:
|
||||
# * conditionalExpression
|
||||
# * | leftHandSideExpression assignmentOperator assignmentExpression
|
||||
# */
|
||||
# (leftHandSideExpression assignmentOperator) =>
|
||||
# leftHandSideExpression assignmentOperator assignmentExpression
|
||||
# | conditionalExpression
|
||||
# ;
|
||||
phoneList -> (%phone (%comma _):* ):*
|
||||
{%
|
||||
pipe(
|
||||
d => d ? d[0].map(t => t.filter(u => u.type === 'phone').map(u => u.toString())) : d
|
||||
)
|
||||
%}
|
||||
setOperation -> orOperation
|
||||
| %identifier
|
||||
{% pipe(
|
||||
d => d.type ? d : ({ identifier: d.toString(), type: 'identifier' })
|
||||
)%}
|
||||
|
||||
# assignmentExpressionNoln:
|
||||
# conditionalExpressionNoln
|
||||
# | leftHandSideExpression assignmentOperator assignmentExpressionNoln
|
||||
# ;
|
||||
|
||||
# assignmentOperator:
|
||||
# /* note that in the grammar these are listed out explicitely */
|
||||
# EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ
|
||||
# | GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ
|
||||
# ;
|
||||
|
||||
# expression:
|
||||
# /*
|
||||
# * SPEC:
|
||||
# * assignmentExpression
|
||||
# * | expression COMMA assignmentExpression
|
||||
# */
|
||||
# assignmentExpression (expressionTail)*
|
||||
# ;
|
||||
orOperation -> _ setOperation __ %kwSetOr __ setOperation _
|
||||
{% pipe(
|
||||
d => d.filter(d => !!d),
|
||||
d => ({ type: 'operator', operator: 'or', operands: [ d[0], d[2] ] }),
|
||||
) %}
|
|
@ -1,21 +1,28 @@
|
|||
const moo = require('moo');
|
||||
const moo = require("moo");
|
||||
|
||||
const lexer = moo.states({
|
||||
main: {
|
||||
comment: /;.*$/,
|
||||
star: { match: /\*/, push: 'epoch' },
|
||||
slash: { match: /\//, push: 'lexicon' },
|
||||
star: { match: /\*/, push: "epoch" },
|
||||
slash: { match: /\//, push: "lexicon" },
|
||||
// change so that identifiers are always upper, keywords are always lower, phones are always lower
|
||||
'kwSet': { match: 'set', type: moo.keywords({ 'kwSet': 'set '}), push: 'setDefinition'},
|
||||
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, },
|
||||
openBracket: { match: /\[/, push: 'feature' },
|
||||
kwSet: {
|
||||
match: "set",
|
||||
type: moo.keywords({ kwSet: "set " }),
|
||||
push: "setDefinition",
|
||||
},
|
||||
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/ },
|
||||
openBracket: { match: /\[/, push: "feature" },
|
||||
whiteSpace: { match: /\s+/, lineBreaks: true },
|
||||
newLine: { match: /\n+/, lineBreaks: true }
|
||||
newLine: { match: /\n+/, lineBreaks: true },
|
||||
},
|
||||
|
||||
epoch: {
|
||||
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, push: 'rule' },
|
||||
openParen: { match: /\(/, push: 'ruleDefinition' },
|
||||
identifier: {
|
||||
match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/,
|
||||
push: "rule",
|
||||
},
|
||||
openParen: { match: /\(/, push: "ruleDefinition" },
|
||||
pipe: { match: /\|/, pop: true },
|
||||
greaterThan: /\>/,
|
||||
arrow: /\-\>/,
|
||||
|
@ -23,60 +30,60 @@ const lexer = moo.states({
|
|||
slash: /\//,
|
||||
dot: /\./,
|
||||
underscore: /\_/,
|
||||
newLine: { match: /\n/, lineBreaks: true }
|
||||
newLine: { match: /\n/, lineBreaks: true },
|
||||
},
|
||||
|
||||
ruleDefinition: {
|
||||
doubleTick: { match: /``/, push: 'ruleName' },
|
||||
singleTick: { match: /`/, push: 'ruleDescription' },
|
||||
doubleTick: { match: /``/, push: "ruleName" },
|
||||
singleTick: { match: /`/, push: "ruleDescription" },
|
||||
// push rule
|
||||
closeParen: { match: /\)/, pop: true },
|
||||
newLine: { match: /\n/, lineBreaks: true }
|
||||
newLine: { match: /\n/, lineBreaks: true },
|
||||
},
|
||||
|
||||
ruleName: {
|
||||
ruleName: { match: /.+(?=``)/ },
|
||||
doubleTick: { match: /``/, pop: true }
|
||||
doubleTick: { match: /``/, pop: true },
|
||||
},
|
||||
|
||||
ruleDescription: {
|
||||
ruleDescription: { match: /.+(?=`)/ },
|
||||
singleTick: { match: /`/, pop: true }
|
||||
singleTick: { match: /`/, pop: true },
|
||||
},
|
||||
|
||||
rule: {
|
||||
openSquareBracket: { match: /\[/, push: 'ruleFeature' },
|
||||
openSquareBracket: { match: /\[/, push: "ruleFeature" },
|
||||
// whiteSpace: { match: /\s/ },
|
||||
newLine: { match: /\n/, pop: true, lineBreaks: true }
|
||||
newLine: { match: /\n/, pop: true, lineBreaks: true },
|
||||
},
|
||||
|
||||
ruleFeature: {
|
||||
ruleFeature: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/ },
|
||||
closeBracket: { match: /\]/, pop: true },
|
||||
newLine: { match: /\n/, lineBreaks: true }
|
||||
newLine: { match: /\n/, lineBreaks: true },
|
||||
},
|
||||
|
||||
lexicon: {
|
||||
slash: { match: /\//, pop: true },
|
||||
newLine: { match: /\n/, lineBreaks: true }
|
||||
newLine: { match: /\n/, lineBreaks: true },
|
||||
},
|
||||
|
||||
feature: {
|
||||
closeBracket: { match: /\]/, pop: true },
|
||||
positiveAssignment: /\+=/,
|
||||
negativeAssignment: /\-=/,
|
||||
newLine: { match: /\n/, lineBreaks: true }
|
||||
newLine: { match: /\n/, lineBreaks: true },
|
||||
},
|
||||
|
||||
setDefinition: {
|
||||
comment: /;.*$/,
|
||||
setIdentifier: { match: /[A-Z]+[A-Z_]*/ },
|
||||
openCurlyBracket: { match: /\{/, push: 'setOperation' },
|
||||
openCurlyBracket: { match: /\{/, push: "setOperation" },
|
||||
equal: /=/,
|
||||
openSquareBracket: /\[/,
|
||||
phone: /[\u00c0-\u03FFa-z]+/,
|
||||
closeSquareBracket: { match: /\]/ },
|
||||
comma: { match: /,/, push: 'commaOperation' },
|
||||
comma: { match: /,/, push: "commaOperation" },
|
||||
whiteSpace: { match: /[\t ]+/ },
|
||||
newLine: { match: /\n/, pop: true, lineBreaks: true },
|
||||
},
|
||||
|
@ -84,24 +91,25 @@ const lexer = moo.states({
|
|||
setOperation: {
|
||||
closeCurlyBracket: { match: /\}/, pop: true },
|
||||
// ! restrict identifiers
|
||||
keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield', 'concat', 'dissoc'], type: moo.keywords({
|
||||
'kw-set-not': 'not' ,
|
||||
'kw-set-and': 'and' ,
|
||||
'kw-set-or': 'or' ,
|
||||
'kw-set-nor': 'nor' ,
|
||||
'kw-set-in': 'in' ,
|
||||
'kw-set-yield': 'yield' ,
|
||||
'kw-set-concat': 'concat',
|
||||
'kw-set-dissoc': 'dissoc'
|
||||
})
|
||||
keyword: {
|
||||
match: ["not", "and", "or", "nor", "in", "yield", "concat", "dissoc"],
|
||||
type: moo.keywords({
|
||||
kwSetNot: "not",
|
||||
kwSetAnd: "and",
|
||||
kwSetOr: "or",
|
||||
kwSetNor: "nor",
|
||||
kwSetIn: "in",
|
||||
kwSetYield: "yield",
|
||||
kwSetConcat: "concat",
|
||||
kwSetDissoc: "dissoc",
|
||||
}),
|
||||
},
|
||||
identifier: /[A-Z]+[A-Z_]+/,
|
||||
whiteSpace: /[\t ]+/,
|
||||
openSquareBracket: /\]/,
|
||||
closeSquareBracket: /\[/,
|
||||
whiteSpace: { match: /\s+/, lineBreaks: true },
|
||||
openSquareBracket: /\[/,
|
||||
closeSquareBracket: /\]/,
|
||||
identifier: /[A-Z]+[A-Z_]*/,
|
||||
phone: /[\u00c0-\u03FFa-z]+/,
|
||||
newLine: { match: /\n/, lineBreaks: true }
|
||||
},
|
||||
|
||||
commaOperation: {
|
||||
|
@ -109,9 +117,8 @@ const lexer = moo.states({
|
|||
// this prevents popping back to main
|
||||
comment: /\s*;.*$/,
|
||||
whiteSpace: { match: /\s+/, lineBreaks: true, pop: true },
|
||||
newLine: { match: /\n/, lineBreaks: true, pop: true }
|
||||
}
|
||||
|
||||
newLine: { match: /\n/, lineBreaks: true, pop: true },
|
||||
},
|
||||
});
|
||||
|
||||
module.exports = {lexer};
|
||||
module.exports = { lexer };
|
||||
|
|
File diff suppressed because it is too large
Load diff
10
src/utils/latl/test/codeGenerator.test.js
Normal file
10
src/utils/latl/test/codeGenerator.test.js
Normal file
|
@ -0,0 +1,10 @@
|
|||
import { assertionData } from './assertionData';
|
||||
import { codeGenerator } from '../codeGenerator';
|
||||
|
||||
describe('codeGenerator', () => {
|
||||
it('parses simple comment', () => {
|
||||
const { latl, code } = assertionData.simpleComment;
|
||||
const generatedCode = codeGenerator(latl);
|
||||
expect(generatedCode).toEqual(code);
|
||||
});
|
||||
})
|
|
@ -1,49 +1,180 @@
|
|||
import { lexer } from '../lexer';
|
||||
import { parser } from '../parser';
|
||||
import { assertionData } from './assertionData';
|
||||
import { lexer } from "../lexer";
|
||||
import { parser } from "../parser";
|
||||
import { assertionData } from "./assertionData";
|
||||
|
||||
describe('parser', () => {
|
||||
it('parses simple comment', () => {
|
||||
const { latl } = assertionData.simpleComment;
|
||||
const AST = parser().feed(latl).results;
|
||||
expect(AST.length).toBe(1);
|
||||
console.log(AST[0])
|
||||
// expect(AST[0]).toStrictEqual()
|
||||
})
|
||||
describe("parser", () => {
|
||||
it("parses simple comment", () => {
|
||||
const { latl, AST } = assertionData.simpleComment;
|
||||
const feedResults = parser().feed(latl).results;
|
||||
expect(feedResults.length).toBe(1);
|
||||
expect(feedResults[0]).toStrictEqual(AST);
|
||||
});
|
||||
|
||||
// it('parses multiple set definitions with comma operator', () => {
|
||||
// const { latl } = assertionData.commaSetDefinition;
|
||||
// const AST = parser().feed(latl)
|
||||
// console.log(AST)
|
||||
// });
|
||||
it("parses simple set definition", () => {
|
||||
const { latl, AST } = assertionData.simpleSetDefinition;
|
||||
const feedResults = parser().feed(latl).results;
|
||||
expect(feedResults.length).toBe(1);
|
||||
expect(feedResults[0]).toStrictEqual(AST);
|
||||
});
|
||||
|
||||
// it('lexes set definition with alias', () => {
|
||||
// const { latl, tokens } = assertionData.setAliasDefinition;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
it("parses multiple set definitions with comma operator", () => {
|
||||
const { latl, AST } = assertionData.commaSetDefinition;
|
||||
const feedResults = parser().feed(latl).results;
|
||||
expect(feedResults.length).toBe(1);
|
||||
expect(feedResults[0]).toStrictEqual(AST);
|
||||
});
|
||||
|
||||
// it('lexes set definition with set join', () => {
|
||||
// const { latl, tokens } = assertionData.setDefinitionJoin;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
it("lexes set definition with alias", () => {
|
||||
const { latl, AST } = assertionData.setAliasDefinition;
|
||||
const feedResults = parser().feed(latl).results;
|
||||
expect(feedResults[0]).toStrictEqual(AST);
|
||||
});
|
||||
|
||||
// it('lexes set definition with yield operation', () => {
|
||||
it.skip("lexes set definition with set join", () => {
|
||||
const { latl, AST } = assertionData.setDefinitionJoin;
|
||||
const feedResults = parser().feed(latl).results;
|
||||
expect(feedResults[0]).toStrictEqual(AST);
|
||||
});
|
||||
|
||||
it.todo(
|
||||
"lexes set definition with yield operation"
|
||||
// , () => {
|
||||
// const { latl, tokens } = assertionData.setDefinitionYield;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
// }
|
||||
);
|
||||
|
||||
// it('lexes all set join operations', () => {
|
||||
it.todo(
|
||||
"lexes all set join operations"
|
||||
// , () => {
|
||||
// const { latl, tokens } = assertionData.setOperationsJoin;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
// }
|
||||
);
|
||||
|
||||
// it('lexes set filter, concat, and dissoc operations', () => {
|
||||
it.todo(
|
||||
"lexes set filter, concat, and dissoc operations"
|
||||
// , () => {
|
||||
// const { latl, tokens } = assertionData.setOperations;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// })
|
||||
})
|
||||
// }
|
||||
);
|
||||
});
|
||||
|
||||
// {
|
||||
// "set":
|
||||
// [
|
||||
// [
|
||||
// [
|
||||
// {
|
||||
// "col": 5,
|
||||
// "line": 2,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 5,
|
||||
// "text": "NASAL_PULMONIC_CONSONANTS",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "setIdentifier",
|
||||
// "value": "NASAL_PULMONIC_CONSONANTS",
|
||||
// },
|
||||
// null,
|
||||
// {
|
||||
// "col": 45,
|
||||
// "line": 2,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 45,
|
||||
// "text": "=",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "equal",
|
||||
// "value": "=",
|
||||
// },
|
||||
// null,
|
||||
// [
|
||||
// [
|
||||
// {
|
||||
// "col": 49,
|
||||
// "line": 2,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 49,
|
||||
// "text": "m̥",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "phone",
|
||||
// "value": "m̥",
|
||||
// },
|
||||
// {
|
||||
// "col": 91,
|
||||
// "line": 2,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 91,
|
||||
// "text": "ɴ",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "phone",
|
||||
// "value": "ɴ",
|
||||
// },
|
||||
// ],
|
||||
// ],
|
||||
// {
|
||||
// "col": 94,
|
||||
// "line": 2,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 94,
|
||||
// "text": ",",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "comma",
|
||||
// "value": ",",
|
||||
// },
|
||||
// null,
|
||||
// ],
|
||||
// ],
|
||||
// - "setIdentifier": "STOP_PULMONIC_CONSONANTS",
|
||||
// {
|
||||
// "col": 5,
|
||||
// "line": 3,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 100,
|
||||
// "text": "STOP_PULMONIC_CONSONANTS",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "setIdentifier",
|
||||
// "value": "STOP_PULMONIC_CONSONANTS",
|
||||
// },
|
||||
// null,
|
||||
// {
|
||||
// "col": 45,
|
||||
// "line": 3,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 140,
|
||||
// "text": "=",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "equal",
|
||||
// "value": "=",
|
||||
// },
|
||||
// null,
|
||||
// [
|
||||
// [
|
||||
// {
|
||||
// "col": 49,
|
||||
// "line": 3,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 144,
|
||||
// "text": "p",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "phone",
|
||||
// "value": "p",
|
||||
// },
|
||||
// {
|
||||
// "col": 104,
|
||||
// "line": 3,
|
||||
// "lineBreaks": 0,
|
||||
// "offset": 199,
|
||||
// "text": "ʔ",
|
||||
// "toString": [tokenToString],
|
||||
// "type": "phone",
|
||||
// "value": "ʔ",
|
||||
// },
|
||||
// ],
|
||||
// ],
|
||||
// ],
|
||||
// "token": "kwSet",
|
||||
// }
|
||||
|
|
Loading…
Reference in a new issue