define tokens for lexing set definitions, aliases, or operation, and aliases

This commit is contained in:
Sorrel Bri 2020-03-14 22:14:31 -07:00
parent 6e230de7f0
commit 2634e35a01
7 changed files with 405 additions and 98 deletions

View file

@ -1,19 +1,19 @@
set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ] set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ],
STOP_PULMONIC_CONSONANTS = [ p, b, p̪, b̪, t̼, d̼, t, d, ʈ, ɖ, c, ɟ, k, ɡ, q, ɢ, ʡ, ʔ ] STOP_PULMONIC_CONSONANTS = [ p, b, p̪, b̪, t̼, d̼, t, d, ʈ, ɖ, c, ɟ, k, ɡ, q, ɢ, ʡ, ʔ ],
S_FRICATIVE_PULMONIC_CONSONANTS = [ s, z, ʃ, ʒ, ʂ, ʐ, ɕ, ʑ ] S_FRICATIVE_PULMONIC_CONSONANTS = [ s, z, ʃ, ʒ, ʂ, ʐ, ɕ, ʑ ],
FRICATIVE_PULMONIC_CONSONANTS = [ ɸ, β, f, v, θ̼, ð̼, θ, ð, θ̠, ð̠, ɹ̠̊˔, ɹ̠˔, ɻ˔, ç, ʝ, x, ɣ, χ, ʁ, ħ, ʕ, h, ɦ ] FRICATIVE_PULMONIC_CONSONANTS = [ ɸ, β, f, v, θ̼, ð̼, θ, ð, θ̠, ð̠, ɹ̠̊˔, ɹ̠˔, ɻ˔, ç, ʝ, x, ɣ, χ, ʁ, ħ, ʕ, h, ɦ ],
APPROXIMANT_PULMONIC_CONSONANTS = [ ʋ̥, ʋ, ɹ̥, ɹ, ɻ̊, ɻ, j̊, j, ɰ̊, ɰ, ʔ̞ ] APPROXIMANT_PULMONIC_CONSONANTS = [ ʋ̥, ʋ, ɹ̥, ɹ, ɻ̊, ɻ, j̊, j, ɰ̊, ɰ, ʔ̞ ],
TAP_PULMONIC_CONSONANTS = [ ⱱ̟, ⱱ, ɾ̼, ɾ̥, ɾ, ɽ̊, ɽ, ɢ̆, ʡ̆ ] TAP_PULMONIC_CONSONANTS = [ ⱱ̟, ⱱ, ɾ̼, ɾ̥, ɾ, ɽ̊, ɽ, ɢ̆, ʡ̆ ],
TRILL_PULMONIC_CONSONANTS = [ ʙ̥, ʙ, r̥, r, ɽ̊r̥, ɽr, ʀ̥, ʀ, ʜ, ʢ ] TRILL_PULMONIC_CONSONANTS = [ ʙ̥, ʙ, r̥, r, ɽ̊r̥, ɽr, ʀ̥, ʀ, ʜ, ʢ ],
L_FRICATIVE_PULMONIC_CONSONANTS = [ ɬ, ɮ, ɭ̊˔, ɭ˔, ʎ̝̊, ʎ̝, ʟ̝̊, ʟ̝ ] L_FRICATIVE_PULMONIC_CONSONANTS = [ ɬ, ɮ, ɭ̊˔, ɭ˔, ʎ̝̊, ʎ̝, ʟ̝̊, ʟ̝ ],
L_APPROXIMANT_PULMONIC_CONSONANTS = [ l̥, l, ɭ̊, ɭ, ʎ̥, ʎ, ʟ̥, ʟ, ʟ̠ ] L_APPROXIMANT_PULMONIC_CONSONANTS = [ l̥, l, ɭ̊, ɭ, ʎ̥, ʎ, ʟ̥, ʟ, ʟ̠ ],
L_TAP_PULMONIC_CONSONANTS = [ ɺ, ɭ̆, ʎ̆, ʟ̆ ] L_TAP_PULMONIC_CONSONANTS = [ ɺ, ɭ̆, ʎ̆, ʟ̆ ],
AFFRICATE_PULMONIC_CONSONANTS = [ pɸ, bβ, p̪f, b̪v, t̪θ, d̪ð, tɹ̝̊, dɹ̝, t̠ɹ̠̊˔, d̠ɹ̠˔, cç, ɟʝ, kx, ɡɣ, qχ, ʡʢ, ʔh ] AFFRICATE_PULMONIC_CONSONANTS = [ pɸ, bβ, p̪f, b̪v, t̪θ, d̪ð, tɹ̝̊, dɹ̝, t̠ɹ̠̊˔, d̠ɹ̠˔, cç, ɟʝ, kx, ɡɣ, qχ, ʡʢ, ʔh ],
S_AFFRICATE_PULMONIC_CONSONANTS = [ ts, dz, t̠ʃ, d̠ʒ, ʈʂ, ɖʐ, tɕ, dʑ ] S_AFFRICATE_PULMONIC_CONSONANTS = [ ts, dz, t̠ʃ, d̠ʒ, ʈʂ, ɖʐ, tɕ, dʑ ],
L_AFFRICATE_PULMONIC_CONSONANTS = [ tɬ, dɮ, ʈɭ̊˔, cʎ̝̊, kʟ̝̊, ɡʟ̝ ] L_AFFRICATE_PULMONIC_CONSONANTS = [ tɬ, dɮ, ʈɭ̊˔, cʎ̝̊, kʟ̝̊, ɡʟ̝ ],
DOUBLE_STOP_PULMONIC_CONSONANTS = [ t͡p, d͡b, k͡p, ɡ͡b, q͡ʡ ] DOUBLE_STOP_PULMONIC_CONSONANTS = [ t͡p, d͡b, k͡p, ɡ͡b, q͡ʡ ],
DOUBLE_NASAL_PULMONIC_CONSONANTS = [ n͡m, ŋ͡m ] DOUBLE_NASAL_PULMONIC_CONSONANTS = [ n͡m, ŋ͡m ],
DOUBLE_FRICATIVE_PULMONIC_CONSONANTS = [ ɧ ] DOUBLE_FRICATIVE_PULMONIC_CONSONANTS = [ ɧ ],
DOUBLE_APPROXIMANT_PULMONIC_CONSONANTS = [ ʍ, w, ɥ̊, ɥ, ɫ ] DOUBLE_APPROXIMANT_PULMONIC_CONSONANTS = [ ʍ, w, ɥ̊, ɥ, ɫ ]
set PULMONIC_CONSONANTS, C = { NASAL_PULMONIC_CONSONANTS or STOP_PULMONIC_CONSONANTS set PULMONIC_CONSONANTS, C = { NASAL_PULMONIC_CONSONANTS or STOP_PULMONIC_CONSONANTS
@ -28,10 +28,10 @@ set PULMONIC_CONSONANTS, C = { NASAL_PULMONIC_CONSONANTS or STOP_PULM
} }
set STOP_EJECTIVE_CONSONANTS = [ pʼ, tʼ, ʈʼ, cʼ, kʼ, qʼ, ʡʼ ] set STOP_EJECTIVE_CONSONANTS = [ pʼ, tʼ, ʈʼ, cʼ, kʼ, qʼ, ʡʼ ],
FRICATIVE_EJECTIVE_CONSONANTS = [ ɸʼ, fʼ, θʼ, sʼ, ʃʼ, ʂʼ, ɕʼ, xʼ, χʼ ] FRICATIVE_EJECTIVE_CONSONANTS = [ ɸʼ, fʼ, θʼ, sʼ, ʃʼ, ʂʼ, ɕʼ, xʼ, χʼ ],
L_FRICATIVE_EJECTIVE_CONSONANTS = [ ɬʼ ] L_FRICATIVE_EJECTIVE_CONSONANTS = [ ɬʼ ],
AFFRICATE_EJECTIVE_CONSONANTS = [ tsʼ, t̠ʃʼ, ʈʂʼ, kxʼ, qχʼ ] AFFRICATE_EJECTIVE_CONSONANTS = [ tsʼ, t̠ʃʼ, ʈʂʼ, kxʼ, qχʼ ],
L_AFFRICATE_EJECTIVE_CONSONANTS = [ tɬʼ, cʎ̝̊ʼ, kʟ̝̊ʼ ] L_AFFRICATE_EJECTIVE_CONSONANTS = [ tɬʼ, cʎ̝̊ʼ, kʟ̝̊ʼ ]
set EJECTIVE_CONSONANTS = { STOP_EJECTIVE_CONSONANTS or FRICATIVE_EJECTIVE_CONSONANTS set EJECTIVE_CONSONANTS = { STOP_EJECTIVE_CONSONANTS or FRICATIVE_EJECTIVE_CONSONANTS
@ -39,9 +39,9 @@ set EJECTIVE_CONSONANTS = { STOP_EJECTIVE_CONSONANTS or FRICATIVE_
or L_AFFRICATE_EJECTIVE_CONSONANTS or L_AFFRICATE_EJECTIVE_CONSONANTS
} }
set TENUIS_CLICK_CONSONANTS = [ ʘ, ǀ, ǃ, ǂ ] set TENUIS_CLICK_CONSONANTS = [ ʘ, ǀ, ǃ, ǂ ],
VOICED_CLICK_CONSONANTS = [ ʘ̬, ǀ̬, ǃ̬, ǂ̬ ] VOICED_CLICK_CONSONANTS = [ ʘ̬, ǀ̬, ǃ̬, ǂ̬ ],
NASAL_CLICK_CONSONANTS = [ ʘ̃, ǀ̃, ǃ̃, ǂ̃ ] NASAL_CLICK_CONSONANTS = [ ʘ̃, ǀ̃, ǃ̃, ǂ̃ ],
L_CLICK_CONSONANTS = [ ǁ, ǁ̬ ] L_CLICK_CONSONANTS = [ ǁ, ǁ̬ ]
set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS
@ -52,25 +52,36 @@ set IMPLOSIVE_CONSONANTS = [ ɓ, ɗ, ᶑ, ʄ, ɠ, ʛ, ɓ̥, ɗ̥,
set NON_PULMONIC_CONSONANTS = { EJECTIVE_CONSONANTS or CLICK_CONSONANTS or IMPLOSIVE_CONSONANTS } set NON_PULMONIC_CONSONANTS = { EJECTIVE_CONSONANTS or CLICK_CONSONANTS or IMPLOSIVE_CONSONANTS }
set IMPLOSIVE_CONSONANTS = { PULMONIC_CONSONANTS or NON_PULMONIC_CONSONANTS } set CONSONANTS = { PULMONIC_CONSONANTS or NON_PULMONIC_CONSONANTS }
set MODAL_VOWELS = [ i, y, ɨ, ʉ, ɯ, u, ɪ, ʏ, ʊ, e, ø ɘ, ɵ ɤ, o, ø̞ ə, o̞, ɛ, œ ɜ, ɞ ʌ, ɔ, æ, ɐ, a, ɶ, ä, ɑ, ɒ ] set MODAL_VOWELS = [ i, y, ɨ, ʉ, ɯ, u, ɪ, ʏ, ʊ, e, ø ɘ, ɵ ɤ, o, ø̞ ə, o̞, ɛ, œ ɜ, ɞ ʌ, ɔ, æ, ɐ, a, ɶ, ä, ɑ, ɒ ],
BREATHY_VOWELS = { [ V ] in MODAL_VOWELS yield [ V̤ ] } BREATHY_VOWELS = { [ V ] in MODAL_VOWELS yield [ V̤ ] },
VOICELESS_VOWELS = { [ V ] in MODAL_VOWELS yield [ V̥ ] } VOICELESS_VOWELS = { [ V ] in MODAL_VOWELS yield [ V̥ ] },
CREAKY_VOWELS = { [ V ] in MODAL_VOWELS yield [ V̰ ] } CREAKY_VOWELS = { [ V ] in MODAL_VOWELS yield [ V̰ ] }
set SHORT_ORAL_VOWELS = { MODAL_VOWELS or BREATHY_VOWELS or CREAKY_VOWELS or VOICELESS_VOWELS } set SHORT_ORAL_VOWELS = { MODAL_VOWELS or BREATHY_VOWELS or CREAKY_VOWELS or VOICELESS_VOWELS },
LONG_ORAL_VOWELS = { [ V ] in SHORT_ORAL_VOWELS [ Vː ] } LONG_ORAL_VOWELS = { [ V ] in SHORT_ORAL_VOWELS [ Vː ] },
ORAL_VOWELS = { SHORT_ORAL_VOWELS or LONG_ORAL_VOWELS } ORAL_VOWELS = { SHORT_ORAL_VOWELS or LONG_ORAL_VOWELS }
set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] } set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] },
SHORT_NASAL_VOWELS = { [ Vː ] in NASAL_VOWELS yield [ V ]ː } SHORT_NASAL_VOWELS = { [ Vː ] in NASAL_VOWELS yield [ V ]ː },
LONG_NASAL_VOWELS = { [ Vː ] in NASAL_VOWELS } LONG_NASAL_VOWELS = { [ Vː ] in NASAL_VOWELS }
set VOWELS = { ORAL_VOWELS or NASAL_VOWELS } set VOWELS = { ORAL_VOWELS or NASAL_VOWELS }
print { GLOBAL } set PHONES = { VOWELS or CONSONANTS }
print [ GLOBAL ]
[lateral
+=
L_AFFRICATE_EJECTIVE_CONSONANTS, L_AFFRICATE_PULMONIC_CONSONANTS, L_APPROXIMANT_PULMONIC_CONSONANTS,
L_CLICK_CONSONANTS, L_FRICATIVE_EJECTIVE_CONSONANTS, L_FRICATIVE_PULMONIC_CONSONANTS, L_TAP_PULMONIC_CONSONANTS
-=
{ not { [+ lateral ] in CONSONANTS } }, VOWELS
; alternative
; { not { [+ lateral ] in PHONES } }
]
*proto-lang *proto-lang

View file

@ -15,7 +15,8 @@
; -- -TENSE = æ / ə / ɪ̞ / ɛ / ʌ / ʊ̞ / ɔ ; -- -TENSE = æ / ə / ɪ̞ / ɛ / ʌ / ʊ̞ / ɔ
; ---- DIPHTHONGS = eə / eɪ̯ / ju̟ / äɪ̞ / ɔɪ̞ / oʊ̞ / aʊ̞ / ɑɹ / iɹ / ɛɹ / ɔɹ / ʊɹ ; ---- DIPHTHONGS = eə / eɪ̯ / ju̟ / äɪ̞ / ɔɪ̞ / oʊ̞ / aʊ̞ / ɑɹ / iɹ / ɛɹ / ɔɹ / ʊɹ
; ---- CONSONANTS = p (pʰ) / b (b̥) / t (tʰ)(ɾ)(ʔ) / d (d̥)(ɾ) / tʃ / dʒ (d̥ʒ̊) / k (kʰ) / g (g̊) / f / v (v̥) / θ / ð (ð̥) / s / z (z̥) / ʃ / ʒ (ʒ̊) / h (ɦ)(ç) / m (ɱ)(m̩) / n(n̩) / ŋ / l (l̩)/ ɹ (ɹʲ ~ ɹˤ)(ɹ̩) / w (w̥) / j / x / ʔ ; ---- CONSONANTS = p (pʰ) / b (b̥) / t (tʰ)(ɾ)(ʔ) / d (d̥)(ɾ) / tʃ / dʒ (d̥ʒ̊) / k (kʰ) / g (g̊) / f / v (v̥) / θ / ð (ð̥) /
; s / z (z̥) / ʃ / ʒ (ʒ̊) / h (ɦ)(ç) / m (ɱ)(m̩) / n(n̩) / ŋ / l (l̩)/ ɹ (ɹʲ ~ ɹˤ)(ɹ̩) / w (w̥) / j / x / ʔ
; -- PLOSIVES = p / p' / pʰ / t / t' / tʰ ɾ / k / k' / kʰ ; -- PLOSIVES = p / p' / pʰ / t / t' / tʰ ɾ / k / k' / kʰ
; -- AFFRICATES = tʃ / dʒ ; -- AFFRICATES = tʃ / dʒ
; -- FRICATIVES = f / v / θ / ð / s / z / ʃ / ʒ / ç / x ; -- FRICATIVES = f / v / θ / ð / s / z / ʃ / ʒ / ç / x
@ -46,7 +47,8 @@ set PLOSIVES [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
; { SET_A not SET_B } left anti join ; { SET_A not SET_B } left anti join
; { SET_A and SET_B } inner join ; { SET_A and SET_B } inner join
; { SET_A or SET_B } full outer join ; { SET_A or SET_B } full outer join
; { SET_A nor SET_B } = { GLOBAL not { SET_A and SET_B } } ; { not SET_A } = { GLOBAL not SET_A }
; { not SET_A nor SET_B } = { GLOBAL not { SET_A or SET_B } }
; ---- set character operations - non-mutable! ; ---- set character operations - non-mutable!
; { [ Xy ] in SET_A } FILTER: where X is any character and y is a filtering character ; { [ Xy ] in SET_A } FILTER: where X is any character and y is a filtering character
@ -125,7 +127,7 @@ set PLOSIVES [ p, pʰ, t, tʼ, tʰ, ɾ, kʼ, k, kʰ ]
; ASPIRATED PLOSIVES ; ASPIRATED PLOSIVES
pʰ, tʰ, kʰ, pʰ, tʰ, kʰ,
; ASPIRATED AFFRICATES ; ASPIRATED AFFRICATES
,
; SPREAD LARYNGEALS ; SPREAD LARYNGEALS
h ɦ h ɦ
-= -=

View file

@ -30,11 +30,29 @@ A -> B / . _ . ; environment indicated with underscore and placeholder dots
## Language Primitives ## Language Primitives
## Data Structures ## Data Structures
### Sets ### Sets
Sets are collections of pointers to phones. The GLOBAL set contains all phones, making all other sets subsets of GLOBAL.
#### Global Set
[ GLOBAL ] is a shorthand for [ GLOBAL.SETS ]
#### Set Definition #### Set Definition
#### Set Usage #### Set Usage
#### Set Operation #### Set Operations
##### 'and' Operation
##### 'or' Operation
##### 'not' Operation
##### 'nor' Operation
##### 'in' Operation
##### 'yield' Operation
### Lexemes ### Lexemes
#### Lexeme Operations #### Lexeme Operations
### Phone ### Phone
For set of phones 'a', 'b', and 'ab':
```
GLOBAL ┬▻ <Key: a> ┬▻ <Key: b> ┬▻ { feature: <Boolean>, ... }
│ │ └▻ grapheme: <String: 'ab'>
│ └┬▻ { feature: <Boolean>, ... }
│ └▻ grapheme: <String: 'a'>
└┬▻ { feature: <Boolean>, ... }
└▻ grapheme: <String: 'b'>
```
#### Phone Operations #### Phone Operations
### Epochs ### Epochs

View file

@ -2,19 +2,24 @@ const moo = require('moo');
export const lexer = moo.states({ export const lexer = moo.states({
main: { main: {
comment: /;.*/, comment: /;.*$/,
epochParent: { match: /\*/, push: 'epoch' }, star: { match: /\*/, push: 'epoch' },
slash: { match: /\//, push: 'lexicon' }, slash: { match: /\//, push: 'lexicon' },
// change so that identifiers are always upper, keywords are always lower, phones are always lower // change so that identifiers are always upper, keywords are always lower, phones are always lower
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, type: moo.keywords({ 'kw-set': { match: 'set', type: moo.keywords({ 'kw-set': 'set '}), push: 'setDefinition'},
'kw-set': { match: 'set', push: 'setDefinition' } identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, },
})}, // type: moo.keywords({
// 'kw-set': 'set'
// // { match: 'set', push: 'setDefinition' },
// })},
openBracket: { match: /\[/, push: 'feature' }, openBracket: { match: /\[/, push: 'feature' },
space: { match: /\s+/, lineBreaks: true } whiteSpace: { match: /\s+/, lineBreaks: true },
newLine: { match: /\n+/, lineBreaks: true }
}, },
epoch: { epoch: {
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, push: 'rule' }, identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, push: 'rule' },
openParen: { match: /\(/, push: 'ruleDefinition' },
pipe: { match: /\|/, pop: true }, pipe: { match: /\|/, pop: true },
greaterThan: /\>/, greaterThan: /\>/,
arrow: /\-\>/, arrow: /\-\>/,
@ -22,55 +27,85 @@ export const lexer = moo.states({
slash: /\//, slash: /\//,
dot: /\./, dot: /\./,
underscore: /\_/, underscore: /\_/,
newLine: { match: /\n/, lineBreaks: true }
},
ruleDefinition: {
doubleTick: { match: /``/, push: 'ruleName' },
singleTick: { match: /`/, push: 'ruleDescription' },
// push rule
closeParen: { match: /\)/, pop: true },
newLine: { match: /\n/, lineBreaks: true }
},
ruleName: {
ruleName: { match: /.+(?=``)/ },
doubleTick: { match: /``/, pop: true }
},
ruleDescription: {
ruleDescription: { match: /.+(?=`)/ },
singleTick: { match: /`/, pop: true }
}, },
rule: { rule: {
openSquareBracket: { match: /\[/, push: 'ruleFeature' }, openSquareBracket: { match: /\[/, push: 'ruleFeature' },
// whiteSpace: { match: /\s/ },
newLine: { match: /\n/, pop: true, lineBreaks: true }
}, },
ruleFeature: { ruleFeature: {
ruleFeature: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/ }, ruleFeature: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/ },
closeBracket: { match: /\]/, pop: true } closeBracket: { match: /\]/, pop: true },
newLine: { match: /\n/, lineBreaks: true }
}, },
lexicon: { lexicon: {
slash: { match: /\//, pop: true }, slash: { match: /\//, pop: true },
newLine: { match: /\n/, lineBreaks: true }
}, },
feature: { feature: {
closeBracket: { match: /\]/, pop: true }, closeBracket: { match: /\]/, pop: true },
positiveAssignment: /\+=/, positiveAssignment: /\+=/,
negativeAssignment: /\-=/, negativeAssignment: /\-=/,
newLine: { match: /\n/, lineBreaks: true }
}, },
setDefinition: { setDefinition: {
openCurlyBracket: /\{/, setIdentifier: { match: /[A-Z]+[A-Z_]*/ },
closeCurlyBracket: /\}/, openCurlyBracket: { match: /\{/, push: 'setOperation' },
equal: /=/,
openSquareBracket: /\[/, openSquareBracket: /\[/,
closeSquareBracket: /\]/ phone: /[\u00c0-\u03FFa-z]+/,
closeSquareBracket: { match: /\]/ },
comma: { match: /,/, push: 'commaOperation' },
whiteSpace: { match: /[\t ]+/ },
newLine: { match: /\n/, pop: true, lineBreaks: true },
},
setOperation: {
closeCurlyBracket: { match: /\}/, pop: true },
// ! restrict identifiers
keyword: { match: ['not', 'and', 'or', 'nor', 'in', 'yield'], type: moo.keywords({
'kw-set-not': 'not' ,
'kw-set-and': 'and' ,
'kw-set-or': 'or' ,
'kw-set-nor': 'nor' ,
'kw-set-in': 'in' ,
'kw-set-yield': 'yield' ,
})
},
identifier: /[A-Z]+[A-Z_]+/,
whiteSpace: /[\t ]+/,
newLine: { match: /\n/, lineBreaks: true }
},
commaOperation: {
// if comma is detected during a definition, the commaOperation consumes all white space and pops back to definition
// this prevents popping back to main
whiteSpace: { match: /\s+/, lineBreaks: true, pop: true },
newLine: { match: /\n/, lineBreaks: true, pop: true }
} }
}); });
// ['semicolon', ';.*\n'],
// [`star`, `\\*`],
// ['pipe', `\\|`],
// ['openBracket', `\\[`],
// ['closeBracket', `\\]`],
// ['positiveAssignment', `\\+=`],
// ['negativeAssignment', `\\-=`],
// ['plus', `\\+`],
// ['minus', `\\-`],
// ['greaterThan', `\\>`],
// ['hash', `#`],
// ['slash', `\/`],
// ['dot', `\\.`],
// ['underscore', `\\_`],
// [`identifier`, `[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*`],
// [`phone`, `[\u00c0-\u03FFA-Za-z0]+`],
// ['equal', `=`],
// [`lineBreak`, `\\n`],
// [`whiteSpace`, `\\s+`]

View file

@ -1,23 +0,0 @@
import { lexer } from './lexer';
describe('lexer', () => {
const extractToken = obj => ({ type: obj.type, value: obj.value });
it('lexes simple comment', () => {
lexer.reset('; comment');
const token = lexer.next();
expect(extractToken(token)).toStrictEqual({ type: 'comment', value: '; comment'});
});
it('lexes simple * and identifier', () => {
lexer.reset('*proto');
const stream = [ extractToken(lexer.next()), extractToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
})
it('lexes set and identifier', () => {
lexer.reset('set PLOSIVES');
const stream = [ extractToken(lexer.next()), extractToken(lexer.next()), extractToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'kw-set', value: 'set' }, { type: 'space', value: ' ' }, { type: 'identifier', value: 'PLOSIVES' } ]);
})
})

View file

@ -0,0 +1,211 @@
export const assertionData = {
setDefinition: {
latl: `
set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ],
STOP_PULMONIC_CONSONANTS = [ p, b, p̪, b̪, t̼, d̼, t, d, ʈ, ɖ, c, ɟ, k, ɡ, q, ɢ, ʡ, ʔ ]`,
tokens: [
{ type: 'whiteSpace', value: '\n' },
{ type: 'kw-set', value: 'set' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'setIdentifier', value: 'NASAL_PULMONIC_CONSONANTS' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'equal', value: '=' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'openSquareBracket', value: '[' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'm̥' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'm' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɱ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'n̼' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'n̥' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'n' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɳ̊' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɳ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɲ̊' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɲ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ŋ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: '̊ŋ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɴ' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'closeSquareBracket', value: ']' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: '\n ' },
{ type: 'setIdentifier', value: 'STOP_PULMONIC_CONSONANTS' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'equal', value: '=' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'openSquareBracket', value: '[' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'p' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'b' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'p̪' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'b̪' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 't̼' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'd̼' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 't' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'd' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ʈ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɖ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'c' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɟ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'k' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɡ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'q' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɢ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ʡ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ʔ' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'closeSquareBracket', value: ']' }
]
},
setAliasDefinition: {
latl: `
set NASAL_PULMONIC_CONSONANTS, N = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ]`,
tokens: [
{ type: 'whiteSpace', value: '\n' },
{ type: 'kw-set', value: 'set' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'setIdentifier', value: 'NASAL_PULMONIC_CONSONANTS' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'setIdentifier', value: 'N' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'equal', value: '=' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'openSquareBracket', value: '[' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'm̥' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'm' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɱ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'n̼' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'n̥' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'n' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɳ̊' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɳ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɲ̊' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɲ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ŋ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: '̊ŋ' },
{ type: 'comma', value: ',' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'phone', value: 'ɴ' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'closeSquareBracket', value: ']' },
]
},
setDefinitionJoin: {
latl: `
set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS
or NASAL_CLICK_CONSONANTS or L_CLICK_CONSONANTS
}`,
tokens: [
{ type: 'whiteSpace', value: '\n'},
{ type: 'kw-set', value: 'set'},
{ type: 'whiteSpace', value: ' '},
{ type: 'setIdentifier', value: 'CLICK_CONSONANTS'},
{ type: 'whiteSpace', value: ' '},
{ type: 'equal', value: '='},
{ type: 'whiteSpace', value: ' '},
{ type: 'openCurlyBracket', value: '{'},
{ type: 'whiteSpace', value: ' '},
{ type: 'identifier', value: 'TENUIS_CLICK_CONSONANTS'},
{ type: 'whiteSpace', value: ' '},
{ type: 'kw-set-or', value: 'or'},
{ type: 'whiteSpace', value: ' '},
{ type: 'identifier', value: 'VOICED_CLICK_CONSONANTS'},
{ type: 'newLine', value: '\n'},
{ type: 'whiteSpace', value: ' '},
{ type: 'kw-set-or', value: 'or'},
{ type: 'whiteSpace', value: ' '},
{ type: 'identifier', value: 'NASAL_CLICK_CONSONANTS'},
{ type: 'whiteSpace', value: ' '},
{ type: 'kw-set-or', value: 'or'},
{ type: 'whiteSpace', value: ' '},
{ type: 'identifier', value: 'L_CLICK_CONSONANTS'},
{ type: 'whiteSpace', value: ' '},
{ type: 'newLine', value: '\n'},
{ type: 'whiteSpace', value: ' '},
{ type: 'closeCurlyBracket', value: '}'}
]
},
}

View file

@ -0,0 +1,53 @@
import { lexer } from '../lexer';
import { assertionData } from './assertionData';
describe('lexer', () => {
const getToken = obj => obj ? formatToken(obj) : null;
const formatToken = obj => ({ type: obj.type, value: obj.value });
const getStream = latl => {
lexer.reset(latl);
let token = getToken(lexer.next());
let stream = [];
do {
stream = [...stream, token]
token = getToken(lexer.next());
} while (token);
return stream;
}
it('lexes simple comment', () => {
lexer.reset('; comment');
const token = lexer.next();
expect(getToken(token)).toStrictEqual({ type: 'comment', value: '; comment'});
});
it('lexes simple * and identifier', () => {
lexer.reset('*proto');
const stream = [ getToken(lexer.next()), getToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
})
it('lexes set and identifier', () => {
lexer.reset('set PLOSIVES');
const stream = [ getToken(lexer.next()), getToken(lexer.next()), getToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'kw-set', value: 'set' }, { type: 'whiteSpace', value: ' ' }, { type: 'setIdentifier', value: 'PLOSIVES' } ]);
})
it('lexes multiple set definitions with comma operator', () => {
const { latl, tokens } = assertionData.setDefinition;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
});
it('lexes set definition with alias', () => {
const { latl, tokens } = assertionData.setAliasDefinition;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
});
it('lexes set definition with set join', () => {
const { latl, tokens } = assertionData.setDefinitionJoin;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
})
})