init lexer with moo

This commit is contained in:
Sorrel Bri 2020-03-11 21:09:41 -07:00
parent 3f2c822c55
commit e08500a047
11 changed files with 150 additions and 32 deletions

5
package-lock.json generated
View file

@ -10010,6 +10010,11 @@
}
}
},
"moo": {
"version": "0.5.1",
"resolved": "https://registry.npmjs.org/moo/-/moo-0.5.1.tgz",
"integrity": "sha512-I1mnb5xn4fO80BH9BLcF0yLypy2UKl+Cb01Fu0hJRkJjlCRtxZMWkTdAtDd5ZqCOxtCkhmRwyI57vWT+1iZ67w=="
},
"move-concurrently": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/move-concurrently/-/move-concurrently-1.0.1.tgz",

View file

@ -7,6 +7,7 @@
"flow-bin": "^0.113.0",
"gh-pages": "^2.2.0",
"local-storage": "^2.0.0",
"moo": "^0.5.1",
"node-sass": "^4.13.1",
"react": "^16.12.0",
"react-dom": "^16.12.0",

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

BIN
public/assets/fca-demo.mov Normal file

Binary file not shown.

Binary file not shown.

View file

@ -1,6 +1,7 @@
; -------- GA ENGLISH PHONETIC INVENTORY
; ---- VOWELS = æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟
; -- NASAL = æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; ɪ̞ / ʊ̞ = lowered
; u̟ = advanced
; -- LABIAL = u̟ / ʊ̞ / ɔ
@ -25,7 +26,17 @@
; -- GLIDES = j / w
; -- LARYNGEALS = h ɦ / ʔ [- consonantal sonorant +/- LARYNGEAL FEATURES] only
; -------- distinctive groups
set PLOSIVES := | p / pʰ / t / tʼ / tʰ / ɾ / kʼ / k / kʰ |
AFFRICATES := | tʃʰ / dʒ |
FRICATIVES := | f / v / θ / ð / s / z / ʃ / ʒ / ç / x |
NASALS := | m / ɱ / n / ŋ |
LIQUIDS := | l / ɹ |
SYLLABICS := | m̩ / n̩ / l̩ / ɹ̩ |
VOWELS := | æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ |
GLIDES := | j / w |
LARYNGEALS := | h / ɦ / ʔ |
; -------- main class features
@ -45,7 +56,7 @@
m̩ / n̩ / l̩ / ɹ̩
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; GLIDES
j / w /
; LARYNGEALS
@ -55,7 +66,7 @@
[sonorant
+=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; GLIDES
j / w w̥ /
; LIQUIDS + RHOTICS
@ -77,7 +88,7 @@
[approximant
+=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; LIQUIDS + RHOTICS
l / ɹ ɹʲ ɹˤ /
; GLIDES
@ -104,7 +115,7 @@
[voice
+=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; GLIDES
j / w /
; LIQUIDS + RHOTICS
@ -141,7 +152,7 @@
h ɦ
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; UNASPIRATED PLOSIVES
p / pʼ / t / tʼ / ɾ / k / kʼ /
; UNASPIRATED AFFRICATES
@ -169,7 +180,7 @@
pʼ / tʼ / kʼ
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; PLOSIVES
p / pʰ / t / tʰ ɾ / k / kʰ /
; AFFRICATES
@ -198,7 +209,7 @@
; FRICATIVES
f / v / θ / ð / s / z / ʃ / ʒ / ç / x /
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; LIQUIDS + RHOTICS
l / ɹ ɹʲ ɹˤ /
; GLIDES
@ -226,7 +237,7 @@
m̩ / n̩
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; FRICATIVES
f / v / θ / ð / s / z / ʃ / ʒ / ç / x /
; LIQUIDS + RHOTICS
@ -249,7 +260,7 @@
tʃ / dʒ
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; PLOSIVES
p / pʼ / pʰ / t / tʼ / tʰ ɾ / k / kʼ / kʰ /
; NON-STRIDENT FRICATIVES
@ -272,7 +283,7 @@
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; PLOSIVES
p / pʼ / pʰ / t / tʼ / tʰ ɾ / k / kʼ / kʰ
; AFFRICATES
@ -296,7 +307,7 @@
[labial
+=
; ROUNDED VOWELS
u̟ / ʊ̞ / ɔ /
u̟ / ʊ̞ / ɔ / ʊ̃ / ũ / ɔ̃
; LABIAL PLOSIVES
p / pʼ / pʰ /
; LABIAL FRICATIVES
@ -309,7 +320,7 @@
w
-=
; UNROUNDED VOWELS
æ / e / ə / ɑ / ɪ̞ / ɛ / ʌ / i /
æ / e / ə / ɑ / ɪ̞ / ɛ / ʌ / i / æ̃ / ẽ / ə̃ / ɑ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ĩ /
; NON-LABIAL PLOSIVES
t / tʼ / tʰ ɾ / k / kʼ / kʰ /
; NON-LABIAL AFFRICATES
@ -348,7 +359,7 @@
n̩ / l̩ / ɹ̩
-=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; NON-CORONAL PLOSIVES
p / pʼ / pʰ / k / kʼ / kʰ
; NON-CORONAL FRICATIVES
@ -388,7 +399,7 @@
ɹ̩ /
; -- NON-CORONALs
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; NON-CORONAL PLOSIVES
p / pʼ / pʰ / k / kʼ / kʰ
; NON-CORONAL FRICATIVES
@ -427,7 +438,7 @@
n̩ / l̩ /
; -- NON-CORONALS
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; NON-CORONAL PLOSIVES
p / pʼ / pʰ / k / kʼ / kʰ
; NON-CORONAL FRICATIVES
@ -447,7 +458,7 @@
[dorsal
+=
; VOWELS
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ /
æ / e / ə / ɑ / ɔ / ɪ̞ / ɛ / ʌ / ʊ̞ / i / u̟ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; DORSAL PLOSIVES
k / kʼ / kʰ /
; DORSAL FRICATIVES
@ -480,7 +491,7 @@
[high
+=
; HIGH VOWELS
i / u̟ / ʊ̞ / ɪ̞
i / u̟ / ʊ̞ / ɪ̞ / ĩ / ũ / ʊ̃ / ɪ̃
; HIGH DORSAL PLOSIVES
k / kʼ / kʰ /
; HIGH DORSAL FRICATIVES
@ -493,7 +504,7 @@
j / w
-= χ / e / o / a
; NON-HIGH VOWELS
ɑ / æ / e / ə / ɛ / ʌ
ɑ / æ / e / ə / ɛ / ʌ / æ̃ / ẽ / ə̃ / ɑ̃ / ɔ̃ / ɛ̃ / ʌ̃ /
; NON-HIGH RHOTIC LIQUIDS
ɹˤ
; -- NON-DORSALS
@ -518,12 +529,12 @@
[low
+=
; LOW VOWELS
ɑ / æ / ɛ /
ɑ / æ / ɛ / æ̃ / ɑ̃ / ɛ̃ /
; LOW DORSAL RHOTIC LIQUIDS
ɹˤ
-= a / ɛ / ɔ
; NON-LOW VOWELS
i / u̟ / ʊ̞ / ɪ̞ / e / ə / ʌ
i / u̟ / ʊ̞ / ɪ̞ / e / ə / ʌ / ẽ / ə̃ / ɔ̃ / ɪ̃ / ʌ̃ / ʊ̃ / ĩ / ũ
; NON-LOW DORSAL PLOSIVES
k / kʼ / kʰ /
; NON-LOW DORSAL FRICATIVES
@ -556,7 +567,7 @@
+=
; k / kʼ / ɣ / χ / u / ə / o / ʌ / ɑ
; BACK VOWELS
ɑ / ɔ / ʌ / ʊ̞ / u̟ /
ɑ / ɔ / ʌ / ʊ̞ / u̟ / ɑ̃ / ɔ̃ / ʌ̃ / ʊ̃ / ũ /
; BACK DORSAL PLOSIVES
k / kʼ / kʰ /
; BACK DORSAL FRICATIVES
@ -573,7 +584,7 @@
; NON-BACK DORSAL GLIDES
j
; NON-BACK VOWELS
æ / e / ə / ɪ̞ / ɛ / i
æ / e / ə / ɪ̞ / ɛ / i / æ̃ / ẽ / ə̃ / ɪ̃ / ɛ̃ / ĩ
; -- NON-DORSALS
; NON-DORSAL PLOSIVES
p / pʼ / pʰ / t / tʼ / tʰ ɾ /
@ -595,10 +606,10 @@
[tense ; compare to ATR or RTR
+=
; TENSE VOWELS
e / i / u̟ / ɑ
e / i / u̟ / ɑ / ĩ / ũ / ẽ / ɑ̃ /
-=
; NON-TENSE VOWELS
æ / ə / ɪ̞ / ɛ / ʌ / ʊ̞ / ɔ /
æ / ə / ɪ̞ / ɛ / ʌ / ʊ̞ / ɔ / æ̃ / ə̃ / ɔ̃ / ɪ̃ / ɛ̃ / ʌ̃ / ʊ̃ /
; DORSAL PLOSIVES
k / kʼ / kʰ /
; DORSAL FRICATIVES
@ -629,20 +640,40 @@
]
*PROTO
; -- Devoicing, all our z's become s's
[+ voice - continuant]>[- voice]/._.
; -- Reduction of schwa
ə>0/._.
|Gif Lang
*PROTO
; -- Ejectivization, all our pits become pit's
|Jif Lang
; -- Devoicing, all our z's become s's
[ + voice consonantal - nasal]>[- voice]/._.
; -- loss of schwa, the is th'
ə>0/._.
; -- Ejectivization, all our pits become pit's
[+ spreadGlottis - continuant]>[+ constrictedGlottis - spreadGlottis]/._[+ constrictedGlottis]
[+ spreadGlottis - continuant]>[+ constrictedGlottis - spreadGlottis]/[+ constrictedGlottis]_.
[+ constrictedGlottis]>0/[+ constrictedGlottis - continuant]_.
[+ constrictedGlottis]>0/._[+ constrictedGlottis - continuant]
|Jif Lang
/PROTO
; -- r color spreading, all our reports become rihpahts
[- consonantal tense]>[+ tense]/ɹ_.
[- consonantal tense]>[+ tense]/._ɹ
[- consonantal high]>[+ high]/ɹʲ_.
[- consonantal high]>[+ high]/._ɹʲ
[- consonantal back]>[+ back]/ɹˤ_.
[- consonantal back]>[+ back]/._ɹˤ
ɹ>0/._.
ɹʲ>0/._.
ɹˤ>0/._.
/
; -- Deaspiration, tiff is diff and diff is tiff
[+ spreadGlottis - continuant]>[- spreadGlottis]/._.
; "JavaScript"
; "gif or jif? I say zhaif"
; "This request returns an empty object"
; "I love going to waffle js!"
; "A donut a day makes living with the threat of pandemic easier"

58
src/utils/lexer.js Normal file
View file

@ -0,0 +1,58 @@
const moo = require('moo');
export const lexer = moo.states({
main: {
comment: /;.*/,
star: { match: /\*/, push: 'epoch' },
slash: { match: /\//, push: 'lexicon' },
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, type: moo.keywords({
'kw-set': 'set'
})},
openBracket: { match: /\[/, push: 'feature' },
space: { match: /\s+/, lineBreaks: true }
},
epoch: {
identifier: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/,
pipe: { match: /\|/, pop: true },
greaterThan: /\>/,
arrow: /\-\>/,
hash: /#/,
slash: /\//,
dot: /\./,
underscore: /\_/,
},
lexicon: {
slash: { match: /\//, pop: true },
},
feature: {
closeBracket: { match: /\]/, pop: true },
positiveAssignment: /\+=/,
negativeAssignment: /\-=/,
}
});
// ['semicolon', ';.*\n'],
// [`star`, `\\*`],
// ['pipe', `\\|`],
// ['openBracket', `\\[`],
// ['closeBracket', `\\]`],
// ['positiveAssignment', `\\+=`],
// ['negativeAssignment', `\\-=`],
// ['plus', `\\+`],
// ['minus', `\\-`],
// ['greaterThan', `\\>`],
// ['hash', `#`],
// ['slash', `\/`],
// ['dot', `\\.`],
// ['underscore', `\\_`],
// [`identifier`, `[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*`],
// [`phone`, `[\u00c0-\u03FFA-Za-z0]+`],
// ['equal', `=`],
// [`lineBreak`, `\\n`],
// [`whiteSpace`, `\\s+`]

23
src/utils/lexer.test.js Normal file
View file

@ -0,0 +1,23 @@
import { lexer } from './lexer';
describe('lexer', () => {
const extractToken = obj => ({ type: obj.type, value: obj.value });
it('lexes simple comment', () => {
lexer.reset('; comment');
const token = lexer.next();
expect(extractToken(token)).toStrictEqual({ type: 'comment', value: '; comment'});
});
it('lexes simple * and identifier', () => {
lexer.reset('*proto');
const stream = [ extractToken(lexer.next()), extractToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
})
it('lexes set and identifier', () => {
lexer.reset('set PLOSIVES');
const stream = [ extractToken(lexer.next()), extractToken(lexer.next()), extractToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'kw-set', value: 'set' }, { type: 'space', value: ' ' }, { type: 'identifier', value: 'PLOSIVES' } ]);
})
})