stub parser
This commit is contained in:
parent
7c75be543f
commit
aa19d42a11
9 changed files with 205 additions and 39 deletions
|
@ -17,7 +17,8 @@
|
|||
},
|
||||
"scripts": {
|
||||
"start": "react-scripts start",
|
||||
"compile-grammar": "nearleyc src/utils/grammar.ne -o src/utils/grammar.js",
|
||||
"compile-grammar": "nearleyc src/utils/latl/grammar.ne -o src/utils/latl/grammar.js",
|
||||
"test-grammar": "nearley-test src/utils/latl/grammar.js --input",
|
||||
"flow": "flow",
|
||||
"build": "react-scripts build",
|
||||
"test": "react-scripts test",
|
||||
|
|
0
src/utils/grammar.js
Normal file
0
src/utils/grammar.js
Normal file
|
@ -3,17 +3,50 @@
|
|||
(function () {
|
||||
function id(x) { return x[0]; }
|
||||
|
||||
const lexer = require('./lexer');
|
||||
const { lexer } = require('./lexer.js');
|
||||
const getTerminal = d => d ? d[0] : null;
|
||||
const getAll = d => d.map((item, i) => ({[i]: item}));
|
||||
const flag = token => d => d.map(item => ({[token]: item}))
|
||||
const clearNull = d => d.filter(t => !!t);
|
||||
const flagIndex = d => d.map((item, i) => ({[i]: item}))
|
||||
const remove = _ => null;
|
||||
const append = d => d.join('');
|
||||
const constructSet = d => d.reduce((acc, t) => {
|
||||
if (t && t.type === 'setIdentifier') acc.push({set: t})
|
||||
if (t && t.length) acc[acc.length - 1].phones = t;
|
||||
return acc;
|
||||
}, []);
|
||||
const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
|
||||
var grammar = {
|
||||
Lexer: lexer,
|
||||
ParserRules: [
|
||||
{"name": "main$ebnf$1$subexpression$1", "symbols": ["statement", {"literal":"\n"}]},
|
||||
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1$subexpression$1"]},
|
||||
{"name": "main$ebnf$1$subexpression$2", "symbols": ["statement", {"literal":"\n"}]},
|
||||
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$2"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "main", "symbols": ["main$ebnf$1"]},
|
||||
{"name": "statement", "symbols": [{"literal":"foo"}]},
|
||||
{"name": "statement", "symbols": [{"literal":"bar"}]}
|
||||
{"name": "main$ebnf$1", "symbols": []},
|
||||
{"name": "main$ebnf$1$subexpression$1", "symbols": ["statement"]},
|
||||
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "main", "symbols": ["main$ebnf$1"], "postprocess": compose(flag('main'), getTerminal)},
|
||||
{"name": "_$ebnf$1$subexpression$1", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)]},
|
||||
{"name": "_$ebnf$1", "symbols": ["_$ebnf$1$subexpression$1"], "postprocess": id},
|
||||
{"name": "_$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
|
||||
{"name": "_", "symbols": ["_$ebnf$1"], "postprocess": remove},
|
||||
{"name": "__", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)], "postprocess": remove},
|
||||
{"name": "statement", "symbols": ["comment"]},
|
||||
{"name": "statement", "symbols": ["definition"], "postprocess": compose(clearNull, getTerminal)},
|
||||
{"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": compose(remove, getTerminal)},
|
||||
{"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": d => ({token: 'setDefinition', sets: d[2]})},
|
||||
{"name": "setDefinition$ebnf$1", "symbols": []},
|
||||
{"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
|
||||
{"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1", "setDefinition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression"], "postprocess": constructSet},
|
||||
{"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess": d => d.filter(t => t && t.length)},
|
||||
{"name": "phoneList$ebnf$1", "symbols": []},
|
||||
{"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), (lexer.has("comma") ? {type: "comma"} : comma), "_"]},
|
||||
{"name": "phoneList$ebnf$1", "symbols": ["phoneList$ebnf$1", "phoneList$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
|
||||
{"name": "phoneList", "symbols": ["phoneList$ebnf$1", (lexer.has("phone") ? {type: "phone"} : phone)], "postprocess": d => d.filter(t => t && (t.type === 'phone' || t.length) )
|
||||
.map(t => {
|
||||
if (!t.length) return t;
|
||||
t.filter(st => st && st.type === 'phone')
|
||||
return t;
|
||||
}) }
|
||||
]
|
||||
, ParserStart: "main"
|
||||
}
|
||||
|
|
|
@ -1,8 +1,79 @@
|
|||
@{%
|
||||
const lexer = require('./lexer');
|
||||
const { lexer } = require('./lexer.js');
|
||||
const getTerminal = d => d ? d[0] : null;
|
||||
const getAll = d => d.map((item, i) => ({[i]: item}));
|
||||
const flag = token => d => d.map(item => ({[token]: item}))
|
||||
const clearNull = d => d.filter(t => !!t);
|
||||
const flagIndex = d => d.map((item, i) => ({[i]: item}))
|
||||
const remove = _ => null;
|
||||
const append = d => d.join('');
|
||||
const constructSet = d => d.reduce((acc, t) => {
|
||||
if (t && t.type === 'setIdentifier') acc.push({set: t})
|
||||
if (t && t.length) acc[acc.length - 1].phones = t;
|
||||
return acc;
|
||||
}, []);
|
||||
const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
|
||||
%}
|
||||
|
||||
@lexer lexer
|
||||
|
||||
main -> (statement "\n"):+
|
||||
statement -> "foo" | "bar"
|
||||
main -> (statement):*
|
||||
{% compose(flag('main'), getTerminal) %}
|
||||
|
||||
_ -> (%whiteSpace):?
|
||||
{% remove %}
|
||||
|
||||
__ -> %whiteSpace
|
||||
{% remove %}
|
||||
|
||||
statement -> comment | definition
|
||||
{% compose(clearNull, getTerminal) %}
|
||||
|
||||
comment -> %comment
|
||||
{% compose(remove, getTerminal) %}
|
||||
|
||||
# SETS
|
||||
definition -> %kwSet __ setDefinition {% d => ({token: 'setDefinition', sets: d[2]}) %}
|
||||
setDefinition -> (%setIdentifier __ %equal __ setExpression %comma __):* %setIdentifier __ %equal __ setExpression
|
||||
{% constructSet %}
|
||||
setExpression -> %openSquareBracket _ phoneList _ %closeSquareBracket
|
||||
{% d => d.filter(t => t && t.length) %}
|
||||
phoneList -> (%phone %comma _):* %phone
|
||||
{% d => d.filter(t => t && (t.type === 'phone' || t.length) )
|
||||
.map(t => {
|
||||
if (!t.length) return t;
|
||||
t.filter(st => st && st.type === 'phone')
|
||||
return t;
|
||||
}) %}
|
||||
|
||||
|
||||
# assignmentExpression:
|
||||
# /*
|
||||
# * SPEC:
|
||||
# * conditionalExpression
|
||||
# * | leftHandSideExpression assignmentOperator assignmentExpression
|
||||
# */
|
||||
# (leftHandSideExpression assignmentOperator) =>
|
||||
# leftHandSideExpression assignmentOperator assignmentExpression
|
||||
# | conditionalExpression
|
||||
# ;
|
||||
|
||||
# assignmentExpressionNoln:
|
||||
# conditionalExpressionNoln
|
||||
# | leftHandSideExpression assignmentOperator assignmentExpressionNoln
|
||||
# ;
|
||||
|
||||
# assignmentOperator:
|
||||
# /* note that in the grammar these are listed out explicitely */
|
||||
# EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ
|
||||
# | GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ
|
||||
# ;
|
||||
|
||||
# expression:
|
||||
# /*
|
||||
# * SPEC:
|
||||
# * assignmentExpression
|
||||
# * | expression COMMA assignmentExpression
|
||||
# */
|
||||
# assignmentExpression (expressionTail)*
|
||||
# ;
|
|
@ -1,17 +1,13 @@
|
|||
const moo = require('moo');
|
||||
|
||||
export const lexer = moo.states({
|
||||
const lexer = moo.states({
|
||||
main: {
|
||||
comment: /;.*$/,
|
||||
star: { match: /\*/, push: 'epoch' },
|
||||
slash: { match: /\//, push: 'lexicon' },
|
||||
// change so that identifiers are always upper, keywords are always lower, phones are always lower
|
||||
'kw-set': { match: 'set', type: moo.keywords({ 'kw-set': 'set '}), push: 'setDefinition'},
|
||||
'kwSet': { match: 'set', type: moo.keywords({ 'kwSet': 'set '}), push: 'setDefinition'},
|
||||
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, },
|
||||
// type: moo.keywords({
|
||||
// 'kw-set': 'set'
|
||||
// // { match: 'set', push: 'setDefinition' },
|
||||
// })},
|
||||
openBracket: { match: /\[/, push: 'feature' },
|
||||
whiteSpace: { match: /\s+/, lineBreaks: true },
|
||||
newLine: { match: /\n+/, lineBreaks: true }
|
||||
|
@ -117,3 +113,5 @@ export const lexer = moo.states({
|
|||
}
|
||||
|
||||
});
|
||||
|
||||
module.exports = {lexer};
|
|
@ -1,4 +1,4 @@
|
|||
const nearley = require("nearley");
|
||||
const grammar = require("./grammar.js");
|
||||
|
||||
const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
|
||||
export const parser = () => new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
|
|
@ -1,11 +1,25 @@
|
|||
export const assertionData = {
|
||||
setDefinition: {
|
||||
simpleComment: {
|
||||
latl: `; comment`,
|
||||
tokens: [
|
||||
{ type: 'comment', value: '; comment'}
|
||||
]
|
||||
},
|
||||
simpleSetDefinition: {
|
||||
latl: `set PLOSIVES`,
|
||||
tokens: [
|
||||
{ type: 'kwSet', value: 'set' },
|
||||
{ type: 'whiteSpace', value: ' ' },
|
||||
{ type: 'setIdentifier', value: 'PLOSIVES' }
|
||||
]
|
||||
},
|
||||
commaSetDefinition: {
|
||||
latl: `
|
||||
set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ],
|
||||
STOP_PULMONIC_CONSONANTS = [ p, b, p̪, b̪, t̼, d̼, t, d, ʈ, ɖ, c, ɟ, k, ɡ, q, ɢ, ʡ, ʔ ]`,
|
||||
tokens: [
|
||||
{ type: 'whiteSpace', value: '\n' },
|
||||
{ type: 'kw-set', value: 'set' },
|
||||
{ type: 'kwSet', value: 'set' },
|
||||
{ type: 'whiteSpace', value: ' ' },
|
||||
{ type: 'setIdentifier', value: 'NASAL_PULMONIC_CONSONANTS' },
|
||||
{ type: 'whiteSpace', value: ' ' },
|
||||
|
@ -121,7 +135,7 @@ set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊,
|
|||
set NASAL_PULMONIC_CONSONANTS, N = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ]`,
|
||||
tokens: [
|
||||
{ type: 'whiteSpace', value: '\n' },
|
||||
{ type: 'kw-set', value: 'set' },
|
||||
{ type: 'kwSet', value: 'set' },
|
||||
{ type: 'whiteSpace', value: ' ' },
|
||||
{ type: 'setIdentifier', value: 'NASAL_PULMONIC_CONSONANTS' },
|
||||
{ type: 'comma', value: ',' },
|
||||
|
@ -180,7 +194,7 @@ set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS
|
|||
}`,
|
||||
tokens: [
|
||||
{ type: 'whiteSpace', value: '\n'},
|
||||
{ type: 'kw-set', value: 'set'},
|
||||
{ type: 'kwSet', value: 'set'},
|
||||
{ type: 'whiteSpace', value: ' '},
|
||||
{ type: 'setIdentifier', value: 'CLICK_CONSONANTS'},
|
||||
{ type: 'whiteSpace', value: ' '},
|
||||
|
@ -215,7 +229,7 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] },
|
|||
LONG_NASAL_VOWELS = { [ Vː ] in NASAL_VOWELS }`,
|
||||
tokens: [
|
||||
{ type: 'whiteSpace', value: '\n', },
|
||||
{ type: 'kw-set', value: 'set', },
|
||||
{ type: 'kwSet', value: 'set', },
|
||||
{ type: 'whiteSpace', value: ' ', },
|
||||
{ type: 'setIdentifier', value: 'NASAL_VOWELS', },
|
||||
{ type: 'whiteSpace', value: ' ', },
|
||||
|
@ -307,7 +321,7 @@ set SET_C = { SET_A not SET_B }, ; left anti join
|
|||
{ type: 'whiteSpace', value: '\n', },
|
||||
{ type: 'comment', value: '; ---- set join operations non-mutable! ', },
|
||||
{ type: 'whiteSpace', value: '\n', },
|
||||
{ type: 'kw-set', value: 'set', },
|
||||
{ type: 'kwSet', value: 'set', },
|
||||
{ type: 'whiteSpace', value: ' ', },
|
||||
{ type: 'setIdentifier', value: 'SET_C', },
|
||||
{ type: 'whiteSpace', value: ' ', },
|
||||
|
@ -406,7 +420,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and
|
|||
{ type: 'whiteSpace', value: '\n', },
|
||||
{ type: 'comment', value: '; ---- set character operations - non-mutable!', },
|
||||
{ type: 'whiteSpace', value: '\n', },
|
||||
{ type: 'kw-set', value: 'set', },
|
||||
{ type: 'kwSet', value: 'set', },
|
||||
{ type: 'whiteSpace', value: ' ', },
|
||||
{ type: 'setIdentifier', value: 'SET_B', },
|
||||
{ type: 'whiteSpace', value: ' ', },
|
||||
|
|
|
@ -16,25 +16,25 @@ describe('lexer', () => {
|
|||
}
|
||||
|
||||
it('lexes simple comment', () => {
|
||||
lexer.reset('; comment');
|
||||
const token = lexer.next();
|
||||
expect(getToken(token)).toStrictEqual({ type: 'comment', value: '; comment'});
|
||||
const { latl, tokens } = assertionData.simpleComment;
|
||||
const stream = getStream(latl);
|
||||
expect(stream).toStrictEqual(tokens);
|
||||
});
|
||||
|
||||
it('lexes simple * and identifier', () => {
|
||||
lexer.reset('*proto');
|
||||
const stream = [ getToken(lexer.next()), getToken(lexer.next()) ];
|
||||
expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
|
||||
})
|
||||
// it('lexes simple * and identifier', () => {
|
||||
// lexer.reset('*proto');
|
||||
// const stream = [ getToken(lexer.next()), getToken(lexer.next()) ];
|
||||
// expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
|
||||
// })
|
||||
|
||||
it('lexes set and identifier', () => {
|
||||
lexer.reset('set PLOSIVES');
|
||||
const stream = [ getToken(lexer.next()), getToken(lexer.next()), getToken(lexer.next()) ];
|
||||
expect(stream).toStrictEqual([ { type: 'kw-set', value: 'set' }, { type: 'whiteSpace', value: ' ' }, { type: 'setIdentifier', value: 'PLOSIVES' } ]);
|
||||
const { latl, tokens } = assertionData.simpleSetDefinition;
|
||||
const stream = getStream(latl);
|
||||
expect(stream).toStrictEqual(tokens);
|
||||
})
|
||||
|
||||
it('lexes multiple set definitions with comma operator', () => {
|
||||
const { latl, tokens } = assertionData.setDefinition;
|
||||
const { latl, tokens } = assertionData.commaSetDefinition;
|
||||
const stream = getStream(latl);
|
||||
expect(stream).toStrictEqual(tokens);
|
||||
});
|
||||
|
|
49
src/utils/latl/test/parser.test.js
Normal file
49
src/utils/latl/test/parser.test.js
Normal file
|
@ -0,0 +1,49 @@
|
|||
import { lexer } from '../lexer';
|
||||
import { parser } from '../parser';
|
||||
import { assertionData } from './assertionData';
|
||||
|
||||
describe('parser', () => {
|
||||
it('parses simple comment', () => {
|
||||
const { latl } = assertionData.simpleComment;
|
||||
const AST = parser().feed(latl).results;
|
||||
expect(AST.length).toBe(1);
|
||||
console.log(AST[0])
|
||||
// expect(AST[0]).toStrictEqual()
|
||||
})
|
||||
|
||||
// it('parses multiple set definitions with comma operator', () => {
|
||||
// const { latl } = assertionData.commaSetDefinition;
|
||||
// const AST = parser().feed(latl)
|
||||
// console.log(AST)
|
||||
// });
|
||||
|
||||
// it('lexes set definition with alias', () => {
|
||||
// const { latl, tokens } = assertionData.setAliasDefinition;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
|
||||
// it('lexes set definition with set join', () => {
|
||||
// const { latl, tokens } = assertionData.setDefinitionJoin;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
|
||||
// it('lexes set definition with yield operation', () => {
|
||||
// const { latl, tokens } = assertionData.setDefinitionYield;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
|
||||
// it('lexes all set join operations', () => {
|
||||
// const { latl, tokens } = assertionData.setOperationsJoin;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// });
|
||||
|
||||
// it('lexes set filter, concat, and dissoc operations', () => {
|
||||
// const { latl, tokens } = assertionData.setOperations;
|
||||
// const stream = getStream(latl);
|
||||
// expect(stream).toStrictEqual(tokens);
|
||||
// })
|
||||
})
|
Loading…
Reference in a new issue