add AST parsing support for epoch definitions

This commit is contained in:
Sorrel Bri 2020-03-02 15:06:59 -08:00
parent 64b2b5d332
commit 35f815a9c9
3 changed files with 302 additions and 55 deletions

View file

@ -1,22 +1,15 @@
export const setLatl = (state, action) => { export const setLatl = (state, action) => {
let latl = action.value; let latl = action.value;
return {...state, latl}; return {...state, latl};
} }
export const parseLatl = (state, action) => {
let latl = state.action;
return { ...state }
}
const getOneToken = (latl, tokens) => { const getOneToken = (latl, tokens) => {
for (const [type, regEx] of tokenTypes) { for (const [type, regEx] of tokenTypes) {
const newRegEx = new RegExp(`^(${regEx})`); const newRegEx = new RegExp(`^(${regEx})`);
const match = latl.match(newRegEx) || null; const match = latl.match(newRegEx) || null;
if (match) { if (match) {
const newTokens = [...tokens, {type, value: match[0]}] const newTokens = [...tokens, {type, value: match[0].trim()}]
const newLatl = latl.slice(match[0].length ,).trim(); const newLatl = latl.slice(match[0].length ,).replace(/\ /,'');
return [newLatl, newTokens] return [newLatl, newTokens]
} }
} }
@ -38,13 +31,235 @@ export const tokenize = latl => {
} }
} }
const addToken = (tree, token, index, tokens) => { const parseLineBreak = (tree, token, index, tokens) => {
console.log(tokens[index - 1], token) const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule': {
if (tree[tree.length - 2].type === 'ruleSet') {
const ruleValue = lastNode.value;
tree[tree.length - 2].value.push(ruleValue);
tree.pop()
return tree; return tree;
} }
if (tree[tree.length - 2].type === 'epoch') {
const newNode = { type: 'ruleSet', value: [ lastNode.value ] }
tree[tree.length - 1] = newNode;
return tree;
}
}
default:
return tree;
}
}
const parseStar = (tree, token, index, tokens) => {
const nextToken = tokens[index + 1];
if (nextToken.type === 'referent') {
return [...tree, { type: 'epoch-parent' }]
}
}
const parsePipe = (tree, token, index, tokens) => {
const nextToken = tokens[index + 1];
if (nextToken.type === 'referent') {
const ruleToken = tree[tree.length - 1];
const epochToken = tree[tree.length - 2];
if (ruleToken.type === 'rule' || ruleToken.type === 'ruleSet') {
if (epochToken.type === 'epoch') {
tree[tree.length - 2] = {
...epochToken,
changes: [...ruleToken.value],
type: 'epoch-name'
}
tree.pop();
return tree;
}
}
}
return [...tree, 'unexpected pipe']
}
const parseReferent = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'epoch-parent': {
tree[tree.length - 1] = {...lastNode, parent: token.value, type: 'epoch' }
return tree;
}
case 'epoch-name': {
tree[tree.length - 1] = {...lastNode, name: token.value, type: 'epoch' }
return tree;
}
case 'rule': {
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
}
case 'ruleSet': {
return [...tree, { type: 'rule', value: token.value }]
}
default:
return [...tree, `unexpected referent ${token.value}`]
}
}
const parseOpenBracket = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'epoch':
return [...tree, {type: 'rule', value: token.value}]
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
default:
return [...tree, 'unexpected open bracket']
}
}
const parseCloseBracket = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
default:
return [...tree, 'unexpected close bracket']
}
}
const parsePlus = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected plus']
}
}
const parseMinus = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected minus']
}
}
const parseGreaterThan = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected greater than']
}
}
const parseSlash = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected slash']
}
}
const parseHash = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected hash']
}
}
const parseDot = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected dot']
}
}
const parseUnderScore = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
return tree;
default:
return [...tree, 'unexpected underscore']
}
}
const generateNode = (tree, token, index, tokens) => {
switch (token.type) {
// if comment, consume without effect
case 'semicolon':
return [...tree]
case 'lineBreak':
return parseLineBreak(tree, token, index, tokens);
// if *PROTO consume token:* and add epochs: [ { parent: 'PROTO' } ]
case 'star':
return parseStar(tree, token, index, tokens);
case 'pipe':
return parsePipe(tree, token, index, tokens);
case 'referent':
return parseReferent(tree, token, index, tokens);
case 'openBracket':
return parseOpenBracket(tree, token, index, tokens);
case 'closeBracket':
return parseCloseBracket(tree, token, index, tokens);
case 'plus':
return parsePlus(tree, token, index, tokens);
case 'minus':
return parseMinus(tree, token, index, tokens);
case 'greaterThan':
return parseGreaterThan(tree, token, index, tokens);
case 'slash':
return parseSlash(tree, token, index, tokens);
case 'hash':
return parseHash(tree, token, index, tokens);
case 'dot':
return parseDot(tree, token, index, tokens);
case 'underscore':
return parseUnderScore(tree, token, index, tokens);
default:
return [...tree, { ...token }]
}
}
const addToken = (tree, token, index, tokens) => generateNode(tree, token, index, tokens);
const connectNodes = (tree, node, index, nodes) => {
switch (node.type) {
case 'epoch':
delete node.type;
return {...tree, epochs: [...tree.epochs, {...node, index: tree.epochs.length} ]}
default:
return tree;
}
}
export const buildTree = tokens => { export const buildTree = tokens => {
return tokens.reduce(addToken, {}) const bareTree = {
epochs: [],
}
const nodes = tokens.reduce(addToken, []);
// return nodes
const tree = nodes.reduce(connectNodes, bareTree);
return tree;
} }
export const generateAST = latl => { export const generateAST = latl => {
@ -57,7 +272,16 @@ export const generateAST = latl => {
return tree; return tree;
} }
export const parseLatl = (state, action) => {
const latl = state.latl;
console.log(latl)
const AST = generateAST(latl);
Object.entries(AST).forEach(([key, value]) => state[key] = value);
return { ...state }
}
const tokenTypes = [ const tokenTypes = [
['semicolon', ';.*\n'],
[`star`, `\\*`], [`star`, `\\*`],
['pipe', `\\|`], ['pipe', `\\|`],
['openBracket', `\\[`], ['openBracket', `\\[`],
@ -70,9 +294,9 @@ const tokenTypes = [
['hash', `#`], ['hash', `#`],
['slash', `\/`], ['slash', `\/`],
['dot', `\\.`], ['dot', `\\.`],
['loDash', `\\_`], ['underscore', `\\_`],
[`variable`, `[A-Za-z]+`], [`referent`, `[A-Za-z]+`],
['equal', `=`] ['equal', `=`],
// [`lineBreak`, `\\n`], [`lineBreak`, `\\n`]
// [`whiteSpace`, `\\s+`] // [`whiteSpace`, `\\s+`]
] ]

View file

@ -1,6 +1,6 @@
import { stateReducer } from './reducer'; import { stateReducer } from './reducer';
import { initState } from './reducer.init'; import { initState } from './reducer.init';
import { tokenize, buildTree } from './reducer.latl'; import { tokenize, buildTree, parseLatl } from './reducer.latl';
describe('LATL', () => { describe('LATL', () => {
it('returns state unaltered with no action body', () => { it('returns state unaltered with no action body', () => {
@ -31,7 +31,8 @@ describe('LATL', () => {
it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => { it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => {
const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl; const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl;
const tokens = tokenize(latl); const tokens = tokenize(latl);
const tokenizedLatl = [...tokenizedEpoch, ...tokenizedFeature, ...tokenizedLexicon]; const lineBreaks = [{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' }]
const tokenizedLatl = [...tokenizedEpoch, ...lineBreaks, ...tokenizedFeature, ...lineBreaks, ...tokenizedLexicon];
expect(tokens).toStrictEqual(tokenizedLatl); expect(tokens).toStrictEqual(tokenizedLatl);
}); });
@ -40,8 +41,23 @@ describe('LATL', () => {
expect(tree).toStrictEqual(treeEpoch); expect(tree).toStrictEqual(treeEpoch);
}) })
it('returns run from well-formed epoch latl', () => {
const state = initState();
const setAction = {
type: 'SET_LATL',
value: epochDefinitionLatl
}
const latlState = stateReducer(state, setAction);
const parseState = parseLatl(latlState, {})
expect(parseState).toStrictEqual(epochState);
parseState.lexicon[0].epoch = 'PROTO'
const runState = stateReducer(parseState, {type: 'RUN', value:{}})
console.log(runState)
})
}) })
const epochDefinitionLatl = ` const epochDefinitionLatl = `
; comment
*PROTO *PROTO
[+ FEATURE]>[- FEATURE]/._. [+ FEATURE]>[- FEATURE]/._.
n>m/#_. n>m/#_.
@ -49,16 +65,17 @@ n>m/#_.
` `
const tokenizedEpoch = [ const tokenizedEpoch = [
{ type: "star", value: "*" }, { type: "variable", value: "PROTO" }, { type: "semicolon", value: "; comment" },
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" }, { type: "star", value: "*" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' },
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" }, { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "slash", value: "/" }, { type: "dot", value: "." }, { type: "slash", value: "/" }, { type: "dot", value: "." },
{ type: "loDash", value: "_" }, { type: "dot", value: "." }, { type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' },
{ type: "variable", value: "n" }, { type: "referent", value: "n" },
{ type: "greaterThan", value: ">" }, { type: "variable", value: "m" }, { type: "greaterThan", value: ">" }, { type: "referent", value: "m" },
{ type: "slash", value: "/" }, { type: "hash", value: "#" }, { type: "slash", value: "/" }, { type: "hash", value: "#" },
{ type: "loDash", value: "_" }, { type: "dot", value: "." }, { type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' },
{ type: "pipe", value: "|" }, { type: "variable", value: "CHILD" } { type: "pipe", value: "|" }, { type: "referent", value: "CHILD" }
] ]
const treeEpoch = { const treeEpoch = {
@ -75,6 +92,12 @@ const treeEpoch = {
] ]
} }
const epochState = {
...initState(),
epochs: treeEpoch.epochs,
latl: epochDefinitionLatl
}
const featureDefinitionLatl = ` const featureDefinitionLatl = `
[+ PLOSIVE] = kp / p / b / d / t / g / k [+ PLOSIVE] = kp / p / b / d / t / g / k
[- PLOSIVE] = m / n / s / z [- PLOSIVE] = m / n / s / z
@ -85,14 +108,14 @@ const featureDefinitionLatl = `
` `
const tokenizedFeature = [ const tokenizedFeature = [
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
{ type: "equal", value: "=" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" }, { type: "equal", value: "=" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
{ type: "equal", value: "=" }, { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "slash", value: "/" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "equal", value: "=" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: "slash", value: "/" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "variable", value: "SONORANT" }, { type: "openBracket", value: "[" }, { type: "referent", value: "SONORANT" }, { type: 'lineBreak', value: '' },
{ type: "positiveAssignment", value: "+=" }, { type: "positiveAssignment", value: "+=" },
{ type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: 'lineBreak', value: '' },
{ type: "negativeAssignment", value: "-=" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "slash", value: "/" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" }, { type: "negativeAssignment", value: "-=" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: "slash", value: "/" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' },
{ type: "closeBracket", value: "]" }, { type: "closeBracket", value: "]" },
] ]
@ -104,8 +127,8 @@ const lexiconDefinitionLatl = `
` `
const tokenizedLexicon = [ const tokenizedLexicon = [
{ type: "slash", value: "/" }, { type: "variable", value: "PROTO" }, { type: "slash", value: "/" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' },
{ type: "variable", value: "kpn" }, { type: "referent", value: "kpn" }, { type: 'lineBreak', value: '' },
{ type: "variable", value: "sm" }, { type: "referent", value: "sm" }, { type: 'lineBreak', value: '' },
{ type: "slash", value: "/" } { type: "slash", value: "/" }
] ]

View file

@ -263,10 +263,10 @@ export const run = (state: stateType, action: resultsAction): stateType => {
const passResults = state.epochs.reduce((results, epoch, _) => { const passResults = state.epochs.reduce((results, epoch, _) => {
const { phones, features, lexicon } = state; const { phones, features, lexicon } = state;
let lexiconBundle; let lexiconBundle;
if ( epoch.parent ) { if ( epoch.parent && results[epoch.parent] ) {
lexiconBundle = results.find(result => result.pass === epoch.parent).lexicon lexiconBundle = results.find(result => result.pass === epoch.parent).lexicon
} }
if (!epoch.parent) { if (!epoch.parent || !results[epoch.parent]) {
lexiconBundle = formBundleFromLexicon(lexicon)(phones); lexiconBundle = formBundleFromLexicon(lexicon)(phones);
} }
const ruleBundle = decomposeRules(epoch, phones); const ruleBundle = decomposeRules(epoch, phones);