add AST parsing support for epoch definitions
This commit is contained in:
parent
64b2b5d332
commit
35f815a9c9
3 changed files with 302 additions and 55 deletions
|
@ -1,22 +1,15 @@
|
|||
|
||||
|
||||
export const setLatl = (state, action) => {
|
||||
let latl = action.value;
|
||||
return {...state, latl};
|
||||
}
|
||||
|
||||
export const parseLatl = (state, action) => {
|
||||
let latl = state.action;
|
||||
return { ...state }
|
||||
}
|
||||
|
||||
const getOneToken = (latl, tokens) => {
|
||||
for (const [type, regEx] of tokenTypes) {
|
||||
const newRegEx = new RegExp(`^(${regEx})`);
|
||||
const match = latl.match(newRegEx) || null;
|
||||
if (match) {
|
||||
const newTokens = [...tokens, {type, value: match[0]}]
|
||||
const newLatl = latl.slice(match[0].length ,).trim();
|
||||
const newTokens = [...tokens, {type, value: match[0].trim()}]
|
||||
const newLatl = latl.slice(match[0].length ,).replace(/\ /,'');
|
||||
return [newLatl, newTokens]
|
||||
}
|
||||
}
|
||||
|
@ -38,13 +31,235 @@ export const tokenize = latl => {
|
|||
}
|
||||
}
|
||||
|
||||
const addToken = (tree, token, index, tokens) => {
|
||||
console.log(tokens[index - 1], token)
|
||||
const parseLineBreak = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule': {
|
||||
if (tree[tree.length - 2].type === 'ruleSet') {
|
||||
const ruleValue = lastNode.value;
|
||||
tree[tree.length - 2].value.push(ruleValue);
|
||||
tree.pop()
|
||||
return tree;
|
||||
}
|
||||
if (tree[tree.length - 2].type === 'epoch') {
|
||||
const newNode = { type: 'ruleSet', value: [ lastNode.value ] }
|
||||
tree[tree.length - 1] = newNode;
|
||||
return tree;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return tree;
|
||||
}
|
||||
}
|
||||
|
||||
const parseStar = (tree, token, index, tokens) => {
|
||||
const nextToken = tokens[index + 1];
|
||||
if (nextToken.type === 'referent') {
|
||||
return [...tree, { type: 'epoch-parent' }]
|
||||
}
|
||||
}
|
||||
|
||||
const parsePipe = (tree, token, index, tokens) => {
|
||||
const nextToken = tokens[index + 1];
|
||||
if (nextToken.type === 'referent') {
|
||||
const ruleToken = tree[tree.length - 1];
|
||||
const epochToken = tree[tree.length - 2];
|
||||
if (ruleToken.type === 'rule' || ruleToken.type === 'ruleSet') {
|
||||
if (epochToken.type === 'epoch') {
|
||||
tree[tree.length - 2] = {
|
||||
...epochToken,
|
||||
changes: [...ruleToken.value],
|
||||
type: 'epoch-name'
|
||||
}
|
||||
tree.pop();
|
||||
return tree;
|
||||
}
|
||||
}
|
||||
}
|
||||
return [...tree, 'unexpected pipe']
|
||||
}
|
||||
|
||||
const parseReferent = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'epoch-parent': {
|
||||
tree[tree.length - 1] = {...lastNode, parent: token.value, type: 'epoch' }
|
||||
return tree;
|
||||
}
|
||||
case 'epoch-name': {
|
||||
tree[tree.length - 1] = {...lastNode, name: token.value, type: 'epoch' }
|
||||
return tree;
|
||||
}
|
||||
case 'rule': {
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
|
||||
return tree;
|
||||
}
|
||||
case 'ruleSet': {
|
||||
return [...tree, { type: 'rule', value: token.value }]
|
||||
}
|
||||
default:
|
||||
return [...tree, `unexpected referent ${token.value}`]
|
||||
}
|
||||
}
|
||||
|
||||
const parseOpenBracket = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'epoch':
|
||||
return [...tree, {type: 'rule', value: token.value}]
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected open bracket']
|
||||
}
|
||||
}
|
||||
|
||||
const parseCloseBracket = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected close bracket']
|
||||
}
|
||||
}
|
||||
|
||||
const parsePlus = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected plus']
|
||||
}
|
||||
}
|
||||
|
||||
const parseMinus = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected minus']
|
||||
}
|
||||
}
|
||||
|
||||
const parseGreaterThan = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected greater than']
|
||||
}
|
||||
}
|
||||
|
||||
const parseSlash = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected slash']
|
||||
}
|
||||
}
|
||||
|
||||
const parseHash = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected hash']
|
||||
}
|
||||
}
|
||||
|
||||
const parseDot = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected dot']
|
||||
}
|
||||
}
|
||||
|
||||
const parseUnderScore = (tree, token, index, tokens) => {
|
||||
const lastNode = tree[tree.length - 1];
|
||||
switch (lastNode.type) {
|
||||
case 'rule':
|
||||
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value}
|
||||
return tree;
|
||||
default:
|
||||
return [...tree, 'unexpected underscore']
|
||||
}
|
||||
}
|
||||
|
||||
const generateNode = (tree, token, index, tokens) => {
|
||||
switch (token.type) {
|
||||
// if comment, consume without effect
|
||||
case 'semicolon':
|
||||
return [...tree]
|
||||
case 'lineBreak':
|
||||
return parseLineBreak(tree, token, index, tokens);
|
||||
// if *PROTO consume token:* and add epochs: [ { parent: 'PROTO' } ]
|
||||
case 'star':
|
||||
return parseStar(tree, token, index, tokens);
|
||||
case 'pipe':
|
||||
return parsePipe(tree, token, index, tokens);
|
||||
case 'referent':
|
||||
return parseReferent(tree, token, index, tokens);
|
||||
case 'openBracket':
|
||||
return parseOpenBracket(tree, token, index, tokens);
|
||||
case 'closeBracket':
|
||||
return parseCloseBracket(tree, token, index, tokens);
|
||||
case 'plus':
|
||||
return parsePlus(tree, token, index, tokens);
|
||||
case 'minus':
|
||||
return parseMinus(tree, token, index, tokens);
|
||||
case 'greaterThan':
|
||||
return parseGreaterThan(tree, token, index, tokens);
|
||||
case 'slash':
|
||||
return parseSlash(tree, token, index, tokens);
|
||||
case 'hash':
|
||||
return parseHash(tree, token, index, tokens);
|
||||
case 'dot':
|
||||
return parseDot(tree, token, index, tokens);
|
||||
case 'underscore':
|
||||
return parseUnderScore(tree, token, index, tokens);
|
||||
default:
|
||||
return [...tree, { ...token }]
|
||||
}
|
||||
}
|
||||
|
||||
const addToken = (tree, token, index, tokens) => generateNode(tree, token, index, tokens);
|
||||
|
||||
const connectNodes = (tree, node, index, nodes) => {
|
||||
switch (node.type) {
|
||||
case 'epoch':
|
||||
delete node.type;
|
||||
return {...tree, epochs: [...tree.epochs, {...node, index: tree.epochs.length} ]}
|
||||
default:
|
||||
return tree;
|
||||
}
|
||||
}
|
||||
|
||||
export const buildTree = tokens => {
|
||||
return tokens.reduce(addToken, {})
|
||||
const bareTree = {
|
||||
epochs: [],
|
||||
}
|
||||
const nodes = tokens.reduce(addToken, []);
|
||||
// return nodes
|
||||
const tree = nodes.reduce(connectNodes, bareTree);
|
||||
return tree;
|
||||
}
|
||||
|
||||
export const generateAST = latl => {
|
||||
|
@ -57,7 +272,16 @@ export const generateAST = latl => {
|
|||
return tree;
|
||||
}
|
||||
|
||||
export const parseLatl = (state, action) => {
|
||||
const latl = state.latl;
|
||||
console.log(latl)
|
||||
const AST = generateAST(latl);
|
||||
Object.entries(AST).forEach(([key, value]) => state[key] = value);
|
||||
return { ...state }
|
||||
}
|
||||
|
||||
const tokenTypes = [
|
||||
['semicolon', ';.*\n'],
|
||||
[`star`, `\\*`],
|
||||
['pipe', `\\|`],
|
||||
['openBracket', `\\[`],
|
||||
|
@ -70,9 +294,9 @@ const tokenTypes = [
|
|||
['hash', `#`],
|
||||
['slash', `\/`],
|
||||
['dot', `\\.`],
|
||||
['loDash', `\\_`],
|
||||
[`variable`, `[A-Za-z]+`],
|
||||
['equal', `=`]
|
||||
// [`lineBreak`, `\\n`],
|
||||
['underscore', `\\_`],
|
||||
[`referent`, `[A-Za-z]+`],
|
||||
['equal', `=`],
|
||||
[`lineBreak`, `\\n`]
|
||||
// [`whiteSpace`, `\\s+`]
|
||||
]
|
|
@ -1,6 +1,6 @@
|
|||
import { stateReducer } from './reducer';
|
||||
import { initState } from './reducer.init';
|
||||
import { tokenize, buildTree } from './reducer.latl';
|
||||
import { tokenize, buildTree, parseLatl } from './reducer.latl';
|
||||
|
||||
describe('LATL', () => {
|
||||
it('returns state unaltered with no action body', () => {
|
||||
|
@ -31,7 +31,8 @@ describe('LATL', () => {
|
|||
it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => {
|
||||
const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl;
|
||||
const tokens = tokenize(latl);
|
||||
const tokenizedLatl = [...tokenizedEpoch, ...tokenizedFeature, ...tokenizedLexicon];
|
||||
const lineBreaks = [{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' }]
|
||||
const tokenizedLatl = [...tokenizedEpoch, ...lineBreaks, ...tokenizedFeature, ...lineBreaks, ...tokenizedLexicon];
|
||||
expect(tokens).toStrictEqual(tokenizedLatl);
|
||||
});
|
||||
|
||||
|
@ -40,8 +41,23 @@ describe('LATL', () => {
|
|||
expect(tree).toStrictEqual(treeEpoch);
|
||||
})
|
||||
|
||||
it('returns run from well-formed epoch latl', () => {
|
||||
const state = initState();
|
||||
const setAction = {
|
||||
type: 'SET_LATL',
|
||||
value: epochDefinitionLatl
|
||||
}
|
||||
const latlState = stateReducer(state, setAction);
|
||||
const parseState = parseLatl(latlState, {})
|
||||
expect(parseState).toStrictEqual(epochState);
|
||||
parseState.lexicon[0].epoch = 'PROTO'
|
||||
const runState = stateReducer(parseState, {type: 'RUN', value:{}})
|
||||
console.log(runState)
|
||||
})
|
||||
|
||||
})
|
||||
const epochDefinitionLatl = `
|
||||
; comment
|
||||
*PROTO
|
||||
[+ FEATURE]>[- FEATURE]/._.
|
||||
n>m/#_.
|
||||
|
@ -49,16 +65,17 @@ n>m/#_.
|
|||
`
|
||||
|
||||
const tokenizedEpoch = [
|
||||
{ type: "star", value: "*" }, { type: "variable", value: "PROTO" },
|
||||
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "semicolon", value: "; comment" },
|
||||
{ type: "star", value: "*" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "slash", value: "/" }, { type: "dot", value: "." },
|
||||
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
|
||||
{ type: "variable", value: "n" },
|
||||
{ type: "greaterThan", value: ">" }, { type: "variable", value: "m" },
|
||||
{ type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' },
|
||||
{ type: "referent", value: "n" },
|
||||
{ type: "greaterThan", value: ">" }, { type: "referent", value: "m" },
|
||||
{ type: "slash", value: "/" }, { type: "hash", value: "#" },
|
||||
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
|
||||
{ type: "pipe", value: "|" }, { type: "variable", value: "CHILD" }
|
||||
{ type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' },
|
||||
{ type: "pipe", value: "|" }, { type: "referent", value: "CHILD" }
|
||||
]
|
||||
|
||||
const treeEpoch = {
|
||||
|
@ -75,6 +92,12 @@ const treeEpoch = {
|
|||
]
|
||||
}
|
||||
|
||||
const epochState = {
|
||||
...initState(),
|
||||
epochs: treeEpoch.epochs,
|
||||
latl: epochDefinitionLatl
|
||||
}
|
||||
|
||||
const featureDefinitionLatl = `
|
||||
[+ PLOSIVE] = kp / p / b / d / t / g / k
|
||||
[- PLOSIVE] = m / n / s / z
|
||||
|
@ -85,14 +108,14 @@ const featureDefinitionLatl = `
|
|||
`
|
||||
|
||||
const tokenizedFeature = [
|
||||
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "equal", value: "=" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
|
||||
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "equal", value: "=" }, { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "slash", value: "/" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" },
|
||||
{ type: "openBracket", value: "[" }, { type: "variable", value: "SONORANT" },
|
||||
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "equal", value: "=" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
|
||||
{ type: "equal", value: "=" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: "slash", value: "/" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "openBracket", value: "[" }, { type: "referent", value: "SONORANT" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "positiveAssignment", value: "+=" },
|
||||
{ type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" },
|
||||
{ type: "negativeAssignment", value: "-=" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "slash", value: "/" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
|
||||
{ type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "negativeAssignment", value: "-=" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: "slash", value: "/" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "closeBracket", value: "]" },
|
||||
]
|
||||
|
||||
|
@ -104,8 +127,8 @@ const lexiconDefinitionLatl = `
|
|||
`
|
||||
|
||||
const tokenizedLexicon = [
|
||||
{ type: "slash", value: "/" }, { type: "variable", value: "PROTO" },
|
||||
{ type: "variable", value: "kpn" },
|
||||
{ type: "variable", value: "sm" },
|
||||
{ type: "slash", value: "/" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "referent", value: "kpn" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "referent", value: "sm" }, { type: 'lineBreak', value: '' },
|
||||
{ type: "slash", value: "/" }
|
||||
]
|
|
@ -263,10 +263,10 @@ export const run = (state: stateType, action: resultsAction): stateType => {
|
|||
const passResults = state.epochs.reduce((results, epoch, _) => {
|
||||
const { phones, features, lexicon } = state;
|
||||
let lexiconBundle;
|
||||
if ( epoch.parent ) {
|
||||
if ( epoch.parent && results[epoch.parent] ) {
|
||||
lexiconBundle = results.find(result => result.pass === epoch.parent).lexicon
|
||||
}
|
||||
if (!epoch.parent) {
|
||||
if (!epoch.parent || !results[epoch.parent]) {
|
||||
lexiconBundle = formBundleFromLexicon(lexicon)(phones);
|
||||
}
|
||||
const ruleBundle = decomposeRules(epoch, phones);
|
||||
|
|
Loading…
Reference in a new issue