add AST parsing support for epoch definitions

This commit is contained in:
Sorrel Bri 2020-03-02 15:06:59 -08:00
parent 64b2b5d332
commit 35f815a9c9
3 changed files with 302 additions and 55 deletions

View file

@ -1,22 +1,15 @@
export const setLatl = (state, action) => {
let latl = action.value;
return {...state, latl};
}
export const parseLatl = (state, action) => {
let latl = state.action;
return { ...state }
}
const getOneToken = (latl, tokens) => {
for (const [type, regEx] of tokenTypes) {
const newRegEx = new RegExp(`^(${regEx})`);
const match = latl.match(newRegEx) || null;
if (match) {
const newTokens = [...tokens, {type, value: match[0]}]
const newLatl = latl.slice(match[0].length ,).trim();
const newTokens = [...tokens, {type, value: match[0].trim()}]
const newLatl = latl.slice(match[0].length ,).replace(/\ /,'');
return [newLatl, newTokens]
}
}
@ -38,41 +31,272 @@ export const tokenize = latl => {
}
}
const addToken = (tree, token, index, tokens) => {
console.log(tokens[index - 1], token)
return tree;
const parseLineBreak = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule': {
if (tree[tree.length - 2].type === 'ruleSet') {
const ruleValue = lastNode.value;
tree[tree.length - 2].value.push(ruleValue);
tree.pop()
return tree;
}
if (tree[tree.length - 2].type === 'epoch') {
const newNode = { type: 'ruleSet', value: [ lastNode.value ] }
tree[tree.length - 1] = newNode;
return tree;
}
}
default:
return tree;
}
}
const parseStar = (tree, token, index, tokens) => {
const nextToken = tokens[index + 1];
if (nextToken.type === 'referent') {
return [...tree, { type: 'epoch-parent' }]
}
}
const parsePipe = (tree, token, index, tokens) => {
const nextToken = tokens[index + 1];
if (nextToken.type === 'referent') {
const ruleToken = tree[tree.length - 1];
const epochToken = tree[tree.length - 2];
if (ruleToken.type === 'rule' || ruleToken.type === 'ruleSet') {
if (epochToken.type === 'epoch') {
tree[tree.length - 2] = {
...epochToken,
changes: [...ruleToken.value],
type: 'epoch-name'
}
tree.pop();
return tree;
}
}
}
return [...tree, 'unexpected pipe']
}
const parseReferent = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'epoch-parent': {
tree[tree.length - 1] = {...lastNode, parent: token.value, type: 'epoch' }
return tree;
}
case 'epoch-name': {
tree[tree.length - 1] = {...lastNode, name: token.value, type: 'epoch' }
return tree;
}
case 'rule': {
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
}
case 'ruleSet': {
return [...tree, { type: 'rule', value: token.value }]
}
default:
return [...tree, `unexpected referent ${token.value}`]
}
}
const parseOpenBracket = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'epoch':
return [...tree, {type: 'rule', value: token.value}]
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
default:
return [...tree, 'unexpected open bracket']
}
}
const parseCloseBracket = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
default:
return [...tree, 'unexpected close bracket']
}
}
const parsePlus = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected plus']
}
}
const parseMinus = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected minus']
}
}
const parseGreaterThan = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected greater than']
}
}
const parseSlash = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected slash']
}
}
const parseHash = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected hash']
}
}
const parseDot = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected dot']
}
}
const parseUnderScore = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected underscore']
}
}
const generateNode = (tree, token, index, tokens) => {
switch (token.type) {
// if comment, consume without effect
case 'semicolon':
return [...tree]
case 'lineBreak':
return parseLineBreak(tree, token, index, tokens);
// if *PROTO consume token:* and add epochs: [ { parent: 'PROTO' } ]
case 'star':
return parseStar(tree, token, index, tokens);
case 'pipe':
return parsePipe(tree, token, index, tokens);
case 'referent':
return parseReferent(tree, token, index, tokens);
case 'openBracket':
return parseOpenBracket(tree, token, index, tokens);
case 'closeBracket':
return parseCloseBracket(tree, token, index, tokens);
case 'plus':
return parsePlus(tree, token, index, tokens);
case 'minus':
return parseMinus(tree, token, index, tokens);
case 'greaterThan':
return parseGreaterThan(tree, token, index, tokens);
case 'slash':
return parseSlash(tree, token, index, tokens);
case 'hash':
return parseHash(tree, token, index, tokens);
case 'dot':
return parseDot(tree, token, index, tokens);
case 'underscore':
return parseUnderScore(tree, token, index, tokens);
default:
return [...tree, { ...token }]
}
}
const addToken = (tree, token, index, tokens) => generateNode(tree, token, index, tokens);
const connectNodes = (tree, node, index, nodes) => {
switch (node.type) {
case 'epoch':
delete node.type;
return {...tree, epochs: [...tree.epochs, {...node, index: tree.epochs.length} ]}
default:
return tree;
}
}
export const buildTree = tokens => {
return tokens.reduce(addToken, {})
const bareTree = {
epochs: [],
}
const nodes = tokens.reduce(addToken, []);
// return nodes
const tree = nodes.reduce(connectNodes, bareTree);
return tree;
}
export const generateAST = latl => {
// tokenize
const tokens = tokenize(latl);
// build tree
const tree = buildTree(tokens);
return tree;
}
export const parseLatl = (state, action) => {
const latl = state.latl;
console.log(latl)
const AST = generateAST(latl);
Object.entries(AST).forEach(([key, value]) => state[key] = value);
return { ...state }
}
const tokenTypes = [
[`star`, `\\*`],
['pipe', `\\|`],
['openBracket', `\\[`],
['closeBracket', `\\]`],
['positiveAssignment', `\\+=`],
['negativeAssignment', `\\-=`],
['plus', `\\+`],
['minus', `\\-`],
['greaterThan', `\\>`],
['hash', `#`],
['slash', `\/`],
['dot', `\\.`],
['loDash', `\\_`],
[`variable`, `[A-Za-z]+`],
['equal', `=`]
// [`lineBreak`, `\\n`],
// [`whiteSpace`, `\\s+`]
['semicolon', ';.*\n'],
[`star`, `\\*`],
['pipe', `\\|`],
['openBracket', `\\[`],
['closeBracket', `\\]`],
['positiveAssignment', `\\+=`],
['negativeAssignment', `\\-=`],
['plus', `\\+`],
['minus', `\\-`],
['greaterThan', `\\>`],
['hash', `#`],
['slash', `\/`],
['dot', `\\.`],
['underscore', `\\_`],
[`referent`, `[A-Za-z]+`],
['equal', `=`],
[`lineBreak`, `\\n`]
// [`whiteSpace`, `\\s+`]
]

View file

@ -1,6 +1,6 @@
import { stateReducer } from './reducer';
import { initState } from './reducer.init';
import { tokenize, buildTree } from './reducer.latl';
import { tokenize, buildTree, parseLatl } from './reducer.latl';
describe('LATL', () => {
it('returns state unaltered with no action body', () => {
@ -31,7 +31,8 @@ describe('LATL', () => {
it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => {
const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl;
const tokens = tokenize(latl);
const tokenizedLatl = [...tokenizedEpoch, ...tokenizedFeature, ...tokenizedLexicon];
const lineBreaks = [{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' }]
const tokenizedLatl = [...tokenizedEpoch, ...lineBreaks, ...tokenizedFeature, ...lineBreaks, ...tokenizedLexicon];
expect(tokens).toStrictEqual(tokenizedLatl);
});
@ -40,8 +41,23 @@ describe('LATL', () => {
expect(tree).toStrictEqual(treeEpoch);
})
it('returns run from well-formed epoch latl', () => {
const state = initState();
const setAction = {
type: 'SET_LATL',
value: epochDefinitionLatl
}
const latlState = stateReducer(state, setAction);
const parseState = parseLatl(latlState, {})
expect(parseState).toStrictEqual(epochState);
parseState.lexicon[0].epoch = 'PROTO'
const runState = stateReducer(parseState, {type: 'RUN', value:{}})
console.log(runState)
})
})
const epochDefinitionLatl = `
; comment
*PROTO
[+ FEATURE]>[- FEATURE]/._.
n>m/#_.
@ -49,16 +65,17 @@ n>m/#_.
`
const tokenizedEpoch = [
{ type: "star", value: "*" }, { type: "variable", value: "PROTO" },
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "semicolon", value: "; comment" },
{ type: "star", value: "*" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "slash", value: "/" }, { type: "dot", value: "." },
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
{ type: "variable", value: "n" },
{ type: "greaterThan", value: ">" }, { type: "variable", value: "m" },
{ type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' },
{ type: "referent", value: "n" },
{ type: "greaterThan", value: ">" }, { type: "referent", value: "m" },
{ type: "slash", value: "/" }, { type: "hash", value: "#" },
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
{ type: "pipe", value: "|" }, { type: "variable", value: "CHILD" }
{ type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' },
{ type: "pipe", value: "|" }, { type: "referent", value: "CHILD" }
]
const treeEpoch = {
@ -68,13 +85,19 @@ const treeEpoch = {
name: 'CHILD',
index: 0,
changes: [
'[+ FEATURE]>[- FEATURE]/._.',
'[+FEATURE]>[-FEATURE]/._.',
'n>m/#_.'
]
}
]
}
const epochState = {
...initState(),
epochs: treeEpoch.epochs,
latl: epochDefinitionLatl
}
const featureDefinitionLatl = `
[+ PLOSIVE] = kp / p / b / d / t / g / k
[- PLOSIVE] = m / n / s / z
@ -85,14 +108,14 @@ const featureDefinitionLatl = `
`
const tokenizedFeature = [
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
{ type: "equal", value: "=" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
{ type: "equal", value: "=" }, { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "slash", value: "/" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" },
{ type: "openBracket", value: "[" }, { type: "variable", value: "SONORANT" },
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
{ type: "equal", value: "=" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
{ type: "equal", value: "=" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: "slash", value: "/" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "referent", value: "SONORANT" }, { type: 'lineBreak', value: '' },
{ type: "positiveAssignment", value: "+=" },
{ type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" },
{ type: "negativeAssignment", value: "-=" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "slash", value: "/" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
{ type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: 'lineBreak', value: '' },
{ type: "negativeAssignment", value: "-=" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: "slash", value: "/" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' },
{ type: "closeBracket", value: "]" },
]
@ -104,8 +127,8 @@ const lexiconDefinitionLatl = `
`
const tokenizedLexicon = [
{ type: "slash", value: "/" }, { type: "variable", value: "PROTO" },
{ type: "variable", value: "kpn" },
{ type: "variable", value: "sm" },
{ type: "slash", value: "/" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' },
{ type: "referent", value: "kpn" }, { type: 'lineBreak', value: '' },
{ type: "referent", value: "sm" }, { type: 'lineBreak', value: '' },
{ type: "slash", value: "/" }
]

View file

@ -263,10 +263,10 @@ export const run = (state: stateType, action: resultsAction): stateType => {
const passResults = state.epochs.reduce((results, epoch, _) => {
const { phones, features, lexicon } = state;
let lexiconBundle;
if ( epoch.parent ) {
if ( epoch.parent && results[epoch.parent] ) {
lexiconBundle = results.find(result => result.pass === epoch.parent).lexicon
}
if (!epoch.parent) {
if (!epoch.parent || !results[epoch.parent]) {
lexiconBundle = formBundleFromLexicon(lexicon)(phones);
}
const ruleBundle = decomposeRules(epoch, phones);