stub parser

This commit is contained in:
Sorrel Bri 2020-03-27 15:55:45 -07:00
parent 7c75be543f
commit aa19d42a11
9 changed files with 205 additions and 39 deletions

View file

@ -17,7 +17,8 @@
},
"scripts": {
"start": "react-scripts start",
"compile-grammar": "nearleyc src/utils/grammar.ne -o src/utils/grammar.js",
"compile-grammar": "nearleyc src/utils/latl/grammar.ne -o src/utils/latl/grammar.js",
"test-grammar": "nearley-test src/utils/latl/grammar.js --input",
"flow": "flow",
"build": "react-scripts build",
"test": "react-scripts test",

0
src/utils/grammar.js Normal file
View file

View file

@ -3,17 +3,50 @@
(function () {
function id(x) { return x[0]; }
const lexer = require('./lexer');
const { lexer } = require('./lexer.js');
const getTerminal = d => d ? d[0] : null;
const getAll = d => d.map((item, i) => ({[i]: item}));
const flag = token => d => d.map(item => ({[token]: item}))
const clearNull = d => d.filter(t => !!t);
const flagIndex = d => d.map((item, i) => ({[i]: item}))
const remove = _ => null;
const append = d => d.join('');
const constructSet = d => d.reduce((acc, t) => {
if (t && t.type === 'setIdentifier') acc.push({set: t})
if (t && t.length) acc[acc.length - 1].phones = t;
return acc;
}, []);
const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
var grammar = {
Lexer: lexer,
ParserRules: [
{"name": "main$ebnf$1$subexpression$1", "symbols": ["statement", {"literal":"\n"}]},
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1$subexpression$1"]},
{"name": "main$ebnf$1$subexpression$2", "symbols": ["statement", {"literal":"\n"}]},
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$2"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "main", "symbols": ["main$ebnf$1"]},
{"name": "statement", "symbols": [{"literal":"foo"}]},
{"name": "statement", "symbols": [{"literal":"bar"}]}
{"name": "main$ebnf$1", "symbols": []},
{"name": "main$ebnf$1$subexpression$1", "symbols": ["statement"]},
{"name": "main$ebnf$1", "symbols": ["main$ebnf$1", "main$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "main", "symbols": ["main$ebnf$1"], "postprocess": compose(flag('main'), getTerminal)},
{"name": "_$ebnf$1$subexpression$1", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)]},
{"name": "_$ebnf$1", "symbols": ["_$ebnf$1$subexpression$1"], "postprocess": id},
{"name": "_$ebnf$1", "symbols": [], "postprocess": function(d) {return null;}},
{"name": "_", "symbols": ["_$ebnf$1"], "postprocess": remove},
{"name": "__", "symbols": [(lexer.has("whiteSpace") ? {type: "whiteSpace"} : whiteSpace)], "postprocess": remove},
{"name": "statement", "symbols": ["comment"]},
{"name": "statement", "symbols": ["definition"], "postprocess": compose(clearNull, getTerminal)},
{"name": "comment", "symbols": [(lexer.has("comment") ? {type: "comment"} : comment)], "postprocess": compose(remove, getTerminal)},
{"name": "definition", "symbols": [(lexer.has("kwSet") ? {type: "kwSet"} : kwSet), "__", "setDefinition"], "postprocess": d => ({token: 'setDefinition', sets: d[2]})},
{"name": "setDefinition$ebnf$1", "symbols": []},
{"name": "setDefinition$ebnf$1$subexpression$1", "symbols": [(lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression", (lexer.has("comma") ? {type: "comma"} : comma), "__"]},
{"name": "setDefinition$ebnf$1", "symbols": ["setDefinition$ebnf$1", "setDefinition$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "setDefinition", "symbols": ["setDefinition$ebnf$1", (lexer.has("setIdentifier") ? {type: "setIdentifier"} : setIdentifier), "__", (lexer.has("equal") ? {type: "equal"} : equal), "__", "setExpression"], "postprocess": constructSet},
{"name": "setExpression", "symbols": [(lexer.has("openSquareBracket") ? {type: "openSquareBracket"} : openSquareBracket), "_", "phoneList", "_", (lexer.has("closeSquareBracket") ? {type: "closeSquareBracket"} : closeSquareBracket)], "postprocess": d => d.filter(t => t && t.length)},
{"name": "phoneList$ebnf$1", "symbols": []},
{"name": "phoneList$ebnf$1$subexpression$1", "symbols": [(lexer.has("phone") ? {type: "phone"} : phone), (lexer.has("comma") ? {type: "comma"} : comma), "_"]},
{"name": "phoneList$ebnf$1", "symbols": ["phoneList$ebnf$1", "phoneList$ebnf$1$subexpression$1"], "postprocess": function arrpush(d) {return d[0].concat([d[1]]);}},
{"name": "phoneList", "symbols": ["phoneList$ebnf$1", (lexer.has("phone") ? {type: "phone"} : phone)], "postprocess": d => d.filter(t => t && (t.type === 'phone' || t.length) )
.map(t => {
if (!t.length) return t;
t.filter(st => st && st.type === 'phone')
return t;
}) }
]
, ParserStart: "main"
}

View file

@ -1,8 +1,79 @@
@{%
const lexer = require('./lexer');
const { lexer } = require('./lexer.js');
const getTerminal = d => d ? d[0] : null;
const getAll = d => d.map((item, i) => ({[i]: item}));
const flag = token => d => d.map(item => ({[token]: item}))
const clearNull = d => d.filter(t => !!t);
const flagIndex = d => d.map((item, i) => ({[i]: item}))
const remove = _ => null;
const append = d => d.join('');
const constructSet = d => d.reduce((acc, t) => {
if (t && t.type === 'setIdentifier') acc.push({set: t})
if (t && t.length) acc[acc.length - 1].phones = t;
return acc;
}, []);
const compose = (...funcs) => d => funcs.reduce((acc, func) => func(acc), d)
%}
@lexer lexer
main -> (statement "\n"):+
statement -> "foo" | "bar"
main -> (statement):*
{% compose(flag('main'), getTerminal) %}
_ -> (%whiteSpace):?
{% remove %}
__ -> %whiteSpace
{% remove %}
statement -> comment | definition
{% compose(clearNull, getTerminal) %}
comment -> %comment
{% compose(remove, getTerminal) %}
# SETS
definition -> %kwSet __ setDefinition {% d => ({token: 'setDefinition', sets: d[2]}) %}
setDefinition -> (%setIdentifier __ %equal __ setExpression %comma __):* %setIdentifier __ %equal __ setExpression
{% constructSet %}
setExpression -> %openSquareBracket _ phoneList _ %closeSquareBracket
{% d => d.filter(t => t && t.length) %}
phoneList -> (%phone %comma _):* %phone
{% d => d.filter(t => t && (t.type === 'phone' || t.length) )
.map(t => {
if (!t.length) return t;
t.filter(st => st && st.type === 'phone')
return t;
}) %}
# assignmentExpression:
# /*
# * SPEC:
# * conditionalExpression
# * | leftHandSideExpression assignmentOperator assignmentExpression
# */
# (leftHandSideExpression assignmentOperator) =>
# leftHandSideExpression assignmentOperator assignmentExpression
# | conditionalExpression
# ;
# assignmentExpressionNoln:
# conditionalExpressionNoln
# | leftHandSideExpression assignmentOperator assignmentExpressionNoln
# ;
# assignmentOperator:
# /* note that in the grammar these are listed out explicitely */
# EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ
# | GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ
# ;
# expression:
# /*
# * SPEC:
# * assignmentExpression
# * | expression COMMA assignmentExpression
# */
# assignmentExpression (expressionTail)*
# ;

View file

@ -1,17 +1,13 @@
const moo = require('moo');
export const lexer = moo.states({
const lexer = moo.states({
main: {
comment: /;.*$/,
star: { match: /\*/, push: 'epoch' },
slash: { match: /\//, push: 'lexicon' },
// change so that identifiers are always upper, keywords are always lower, phones are always lower
'kw-set': { match: 'set', type: moo.keywords({ 'kw-set': 'set '}), push: 'setDefinition'},
'kwSet': { match: 'set', type: moo.keywords({ 'kwSet': 'set '}), push: 'setDefinition'},
identifier: { match: /[A-Za-z]+[\u00c0-\u03FFA-Za-z0-9\\-\\_]*/, },
// type: moo.keywords({
// 'kw-set': 'set'
// // { match: 'set', push: 'setDefinition' },
// })},
openBracket: { match: /\[/, push: 'feature' },
whiteSpace: { match: /\s+/, lineBreaks: true },
newLine: { match: /\n+/, lineBreaks: true }
@ -116,4 +112,6 @@ export const lexer = moo.states({
newLine: { match: /\n/, lineBreaks: true, pop: true }
}
});
});
module.exports = {lexer};

View file

@ -1,4 +1,4 @@
const nearley = require("nearley");
const grammar = require("./grammar.js");
const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
export const parser = () => new nearley.Parser(nearley.Grammar.fromCompiled(grammar));

View file

@ -1,11 +1,25 @@
export const assertionData = {
setDefinition: {
simpleComment: {
latl: `; comment`,
tokens: [
{ type: 'comment', value: '; comment'}
]
},
simpleSetDefinition: {
latl: `set PLOSIVES`,
tokens: [
{ type: 'kwSet', value: 'set' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'setIdentifier', value: 'PLOSIVES' }
]
},
commaSetDefinition: {
latl: `
set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ],
STOP_PULMONIC_CONSONANTS = [ p, b, p̪, b̪, t̼, d̼, t, d, ʈ, ɖ, c, ɟ, k, ɡ, q, ɢ, ʡ, ʔ ]`,
tokens: [
{ type: 'whiteSpace', value: '\n' },
{ type: 'kw-set', value: 'set' },
{ type: 'kwSet', value: 'set' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'setIdentifier', value: 'NASAL_PULMONIC_CONSONANTS' },
{ type: 'whiteSpace', value: ' ' },
@ -121,7 +135,7 @@ set NASAL_PULMONIC_CONSONANTS = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊,
set NASAL_PULMONIC_CONSONANTS, N = [ m̥, m, ɱ, n̼, n̥, n, ɳ̊, ɳ, ɲ̊, ɲ, ŋ, ̊ŋ, ɴ ]`,
tokens: [
{ type: 'whiteSpace', value: '\n' },
{ type: 'kw-set', value: 'set' },
{ type: 'kwSet', value: 'set' },
{ type: 'whiteSpace', value: ' ' },
{ type: 'setIdentifier', value: 'NASAL_PULMONIC_CONSONANTS' },
{ type: 'comma', value: ',' },
@ -180,7 +194,7 @@ set CLICK_CONSONANTS = { TENUIS_CLICK_CONSONANTS or VOICED_CLICK_CONSONANTS
}`,
tokens: [
{ type: 'whiteSpace', value: '\n'},
{ type: 'kw-set', value: 'set'},
{ type: 'kwSet', value: 'set'},
{ type: 'whiteSpace', value: ' '},
{ type: 'setIdentifier', value: 'CLICK_CONSONANTS'},
{ type: 'whiteSpace', value: ' '},
@ -215,7 +229,7 @@ set NASAL_VOWELS = { [ V ] in ORAL_VOWELS yield [ Ṽ ] },
LONG_NASAL_VOWELS = { [ Vː ] in NASAL_VOWELS }`,
tokens: [
{ type: 'whiteSpace', value: '\n', },
{ type: 'kw-set', value: 'set', },
{ type: 'kwSet', value: 'set', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'setIdentifier', value: 'NASAL_VOWELS', },
{ type: 'whiteSpace', value: ' ', },
@ -307,7 +321,7 @@ set SET_C = { SET_A not SET_B }, ; left anti join
{ type: 'whiteSpace', value: '\n', },
{ type: 'comment', value: '; ---- set join operations non-mutable! ', },
{ type: 'whiteSpace', value: '\n', },
{ type: 'kw-set', value: 'set', },
{ type: 'kwSet', value: 'set', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'setIdentifier', value: 'SET_C', },
{ type: 'whiteSpace', value: ' ', },
@ -406,7 +420,7 @@ set SET_B = { [ Xy ] in SET_A }, ; FILTER: where X is any character and
{ type: 'whiteSpace', value: '\n', },
{ type: 'comment', value: '; ---- set character operations - non-mutable!', },
{ type: 'whiteSpace', value: '\n', },
{ type: 'kw-set', value: 'set', },
{ type: 'kwSet', value: 'set', },
{ type: 'whiteSpace', value: ' ', },
{ type: 'setIdentifier', value: 'SET_B', },
{ type: 'whiteSpace', value: ' ', },

View file

@ -16,25 +16,25 @@ describe('lexer', () => {
}
it('lexes simple comment', () => {
lexer.reset('; comment');
const token = lexer.next();
expect(getToken(token)).toStrictEqual({ type: 'comment', value: '; comment'});
const { latl, tokens } = assertionData.simpleComment;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
});
it('lexes simple * and identifier', () => {
lexer.reset('*proto');
const stream = [ getToken(lexer.next()), getToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
})
// it('lexes simple * and identifier', () => {
// lexer.reset('*proto');
// const stream = [ getToken(lexer.next()), getToken(lexer.next()) ];
// expect(stream).toStrictEqual([ { type: 'star', value: '*' }, { type: 'identifier', value: 'proto' } ]);
// })
it('lexes set and identifier', () => {
lexer.reset('set PLOSIVES');
const stream = [ getToken(lexer.next()), getToken(lexer.next()), getToken(lexer.next()) ];
expect(stream).toStrictEqual([ { type: 'kw-set', value: 'set' }, { type: 'whiteSpace', value: ' ' }, { type: 'setIdentifier', value: 'PLOSIVES' } ]);
const { latl, tokens } = assertionData.simpleSetDefinition;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
})
it('lexes multiple set definitions with comma operator', () => {
const { latl, tokens } = assertionData.setDefinition;
const { latl, tokens } = assertionData.commaSetDefinition;
const stream = getStream(latl);
expect(stream).toStrictEqual(tokens);
});

View file

@ -0,0 +1,49 @@
import { lexer } from '../lexer';
import { parser } from '../parser';
import { assertionData } from './assertionData';
describe('parser', () => {
it('parses simple comment', () => {
const { latl } = assertionData.simpleComment;
const AST = parser().feed(latl).results;
expect(AST.length).toBe(1);
console.log(AST[0])
// expect(AST[0]).toStrictEqual()
})
// it('parses multiple set definitions with comma operator', () => {
// const { latl } = assertionData.commaSetDefinition;
// const AST = parser().feed(latl)
// console.log(AST)
// });
// it('lexes set definition with alias', () => {
// const { latl, tokens } = assertionData.setAliasDefinition;
// const stream = getStream(latl);
// expect(stream).toStrictEqual(tokens);
// });
// it('lexes set definition with set join', () => {
// const { latl, tokens } = assertionData.setDefinitionJoin;
// const stream = getStream(latl);
// expect(stream).toStrictEqual(tokens);
// });
// it('lexes set definition with yield operation', () => {
// const { latl, tokens } = assertionData.setDefinitionYield;
// const stream = getStream(latl);
// expect(stream).toStrictEqual(tokens);
// });
// it('lexes all set join operations', () => {
// const { latl, tokens } = assertionData.setOperationsJoin;
// const stream = getStream(latl);
// expect(stream).toStrictEqual(tokens);
// });
// it('lexes set filter, concat, and dissoc operations', () => {
// const { latl, tokens } = assertionData.setOperations;
// const stream = getStream(latl);
// expect(stream).toStrictEqual(tokens);
// })
})