From 35f815a9c9ee932a9bb8e391e9f835f6270f98f1 Mon Sep 17 00:00:00 2001 From: Sorrel Bri Date: Mon, 2 Mar 2020 15:06:59 -0800 Subject: [PATCH] add AST parsing support for epoch definitions --- src/reducers/reducer.latl.js | 288 ++++++++++++++++++++++++++---- src/reducers/reducer.latl.test.js | 65 ++++--- src/reducers/reducer.results.js | 4 +- 3 files changed, 302 insertions(+), 55 deletions(-) diff --git a/src/reducers/reducer.latl.js b/src/reducers/reducer.latl.js index 84bb3fe..1dbc825 100644 --- a/src/reducers/reducer.latl.js +++ b/src/reducers/reducer.latl.js @@ -1,22 +1,15 @@ - - export const setLatl = (state, action) => { let latl = action.value; return {...state, latl}; } -export const parseLatl = (state, action) => { - let latl = state.action; - return { ...state } -} - const getOneToken = (latl, tokens) => { for (const [type, regEx] of tokenTypes) { const newRegEx = new RegExp(`^(${regEx})`); const match = latl.match(newRegEx) || null; if (match) { - const newTokens = [...tokens, {type, value: match[0]}] - const newLatl = latl.slice(match[0].length ,).trim(); + const newTokens = [...tokens, {type, value: match[0].trim()}] + const newLatl = latl.slice(match[0].length ,).replace(/\ /,''); return [newLatl, newTokens] } } @@ -38,41 +31,272 @@ export const tokenize = latl => { } } -const addToken = (tree, token, index, tokens) => { - console.log(tokens[index - 1], token) - return tree; +const parseLineBreak = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': { + if (tree[tree.length - 2].type === 'ruleSet') { + const ruleValue = lastNode.value; + tree[tree.length - 2].value.push(ruleValue); + tree.pop() + return tree; + } + if (tree[tree.length - 2].type === 'epoch') { + const newNode = { type: 'ruleSet', value: [ lastNode.value ] } + tree[tree.length - 1] = newNode; + return tree; + } + } + default: + return tree; + } +} + +const parseStar = (tree, token, index, tokens) => { + const nextToken = tokens[index + 1]; + if (nextToken.type === 'referent') { + return [...tree, { type: 'epoch-parent' }] + } +} + +const parsePipe = (tree, token, index, tokens) => { + const nextToken = tokens[index + 1]; + if (nextToken.type === 'referent') { + const ruleToken = tree[tree.length - 1]; + const epochToken = tree[tree.length - 2]; + if (ruleToken.type === 'rule' || ruleToken.type === 'ruleSet') { + if (epochToken.type === 'epoch') { + tree[tree.length - 2] = { + ...epochToken, + changes: [...ruleToken.value], + type: 'epoch-name' + } + tree.pop(); + return tree; + } + } + } + return [...tree, 'unexpected pipe'] +} + +const parseReferent = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'epoch-parent': { + tree[tree.length - 1] = {...lastNode, parent: token.value, type: 'epoch' } + return tree; + } + case 'epoch-name': { + tree[tree.length - 1] = {...lastNode, name: token.value, type: 'epoch' } + return tree; + } + case 'rule': { + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value } + return tree; + } + case 'ruleSet': { + return [...tree, { type: 'rule', value: token.value }] + } + default: + return [...tree, `unexpected referent ${token.value}`] + } + } + +const parseOpenBracket = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'epoch': + return [...tree, {type: 'rule', value: token.value}] + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value } + return tree; + default: + return [...tree, 'unexpected open bracket'] + } +} + +const parseCloseBracket = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value } + return tree; + default: + return [...tree, 'unexpected close bracket'] + } +} + +const parsePlus = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected plus'] + } +} + +const parseMinus = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected minus'] + } +} + +const parseGreaterThan = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected greater than'] + } +} + +const parseSlash = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected slash'] + } +} + +const parseHash = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected hash'] + } +} + +const parseDot = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected dot'] + } +} + +const parseUnderScore = (tree, token, index, tokens) => { + const lastNode = tree[tree.length - 1]; + switch (lastNode.type) { + case 'rule': + tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value} + return tree; + default: + return [...tree, 'unexpected underscore'] + } +} + +const generateNode = (tree, token, index, tokens) => { + switch (token.type) { + // if comment, consume without effect + case 'semicolon': + return [...tree] + case 'lineBreak': + return parseLineBreak(tree, token, index, tokens); + // if *PROTO consume token:* and add epochs: [ { parent: 'PROTO' } ] + case 'star': + return parseStar(tree, token, index, tokens); + case 'pipe': + return parsePipe(tree, token, index, tokens); + case 'referent': + return parseReferent(tree, token, index, tokens); + case 'openBracket': + return parseOpenBracket(tree, token, index, tokens); + case 'closeBracket': + return parseCloseBracket(tree, token, index, tokens); + case 'plus': + return parsePlus(tree, token, index, tokens); + case 'minus': + return parseMinus(tree, token, index, tokens); + case 'greaterThan': + return parseGreaterThan(tree, token, index, tokens); + case 'slash': + return parseSlash(tree, token, index, tokens); + case 'hash': + return parseHash(tree, token, index, tokens); + case 'dot': + return parseDot(tree, token, index, tokens); + case 'underscore': + return parseUnderScore(tree, token, index, tokens); + default: + return [...tree, { ...token }] + } +} + +const addToken = (tree, token, index, tokens) => generateNode(tree, token, index, tokens); + +const connectNodes = (tree, node, index, nodes) => { + switch (node.type) { + case 'epoch': + delete node.type; + return {...tree, epochs: [...tree.epochs, {...node, index: tree.epochs.length} ]} + default: + return tree; + } } export const buildTree = tokens => { - return tokens.reduce(addToken, {}) + const bareTree = { + epochs: [], + } + const nodes = tokens.reduce(addToken, []); + // return nodes + const tree = nodes.reduce(connectNodes, bareTree); + return tree; } export const generateAST = latl => { // tokenize const tokens = tokenize(latl); - + // build tree const tree = buildTree(tokens); - + return tree; } +export const parseLatl = (state, action) => { + const latl = state.latl; + console.log(latl) + const AST = generateAST(latl); + Object.entries(AST).forEach(([key, value]) => state[key] = value); + return { ...state } +} + const tokenTypes = [ -[`star`, `\\*`], -['pipe', `\\|`], -['openBracket', `\\[`], -['closeBracket', `\\]`], -['positiveAssignment', `\\+=`], -['negativeAssignment', `\\-=`], -['plus', `\\+`], -['minus', `\\-`], -['greaterThan', `\\>`], -['hash', `#`], -['slash', `\/`], -['dot', `\\.`], -['loDash', `\\_`], -[`variable`, `[A-Za-z]+`], -['equal', `=`] -// [`lineBreak`, `\\n`], -// [`whiteSpace`, `\\s+`] + ['semicolon', ';.*\n'], + [`star`, `\\*`], + ['pipe', `\\|`], + ['openBracket', `\\[`], + ['closeBracket', `\\]`], + ['positiveAssignment', `\\+=`], + ['negativeAssignment', `\\-=`], + ['plus', `\\+`], + ['minus', `\\-`], + ['greaterThan', `\\>`], + ['hash', `#`], + ['slash', `\/`], + ['dot', `\\.`], + ['underscore', `\\_`], + [`referent`, `[A-Za-z]+`], + ['equal', `=`], + [`lineBreak`, `\\n`] + // [`whiteSpace`, `\\s+`] ] \ No newline at end of file diff --git a/src/reducers/reducer.latl.test.js b/src/reducers/reducer.latl.test.js index f786835..2acba20 100644 --- a/src/reducers/reducer.latl.test.js +++ b/src/reducers/reducer.latl.test.js @@ -1,6 +1,6 @@ import { stateReducer } from './reducer'; import { initState } from './reducer.init'; -import { tokenize, buildTree } from './reducer.latl'; +import { tokenize, buildTree, parseLatl } from './reducer.latl'; describe('LATL', () => { it('returns state unaltered with no action body', () => { @@ -31,7 +31,8 @@ describe('LATL', () => { it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => { const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl; const tokens = tokenize(latl); - const tokenizedLatl = [...tokenizedEpoch, ...tokenizedFeature, ...tokenizedLexicon]; + const lineBreaks = [{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' }] + const tokenizedLatl = [...tokenizedEpoch, ...lineBreaks, ...tokenizedFeature, ...lineBreaks, ...tokenizedLexicon]; expect(tokens).toStrictEqual(tokenizedLatl); }); @@ -40,8 +41,23 @@ describe('LATL', () => { expect(tree).toStrictEqual(treeEpoch); }) + it('returns run from well-formed epoch latl', () => { + const state = initState(); + const setAction = { + type: 'SET_LATL', + value: epochDefinitionLatl + } + const latlState = stateReducer(state, setAction); + const parseState = parseLatl(latlState, {}) + expect(parseState).toStrictEqual(epochState); + parseState.lexicon[0].epoch = 'PROTO' + const runState = stateReducer(parseState, {type: 'RUN', value:{}}) + console.log(runState) + }) + }) const epochDefinitionLatl = ` +; comment *PROTO [+ FEATURE]>[- FEATURE]/._. n>m/#_. @@ -49,16 +65,17 @@ n>m/#_. ` const tokenizedEpoch = [ - { type: "star", value: "*" }, { type: "variable", value: "PROTO" }, - { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" }, - { type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" }, + { type: "semicolon", value: "; comment" }, + { type: "star", value: "*" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' }, + { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" }, + { type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" }, { type: "slash", value: "/" }, { type: "dot", value: "." }, - { type: "loDash", value: "_" }, { type: "dot", value: "." }, - { type: "variable", value: "n" }, - { type: "greaterThan", value: ">" }, { type: "variable", value: "m" }, + { type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' }, + { type: "referent", value: "n" }, + { type: "greaterThan", value: ">" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "hash", value: "#" }, - { type: "loDash", value: "_" }, { type: "dot", value: "." }, - { type: "pipe", value: "|" }, { type: "variable", value: "CHILD" } + { type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' }, + { type: "pipe", value: "|" }, { type: "referent", value: "CHILD" } ] const treeEpoch = { @@ -68,13 +85,19 @@ const treeEpoch = { name: 'CHILD', index: 0, changes: [ - '[+ FEATURE]>[- FEATURE]/._.', + '[+FEATURE]>[-FEATURE]/._.', 'n>m/#_.' ] } ] } +const epochState = { + ...initState(), + epochs: treeEpoch.epochs, + latl: epochDefinitionLatl +} + const featureDefinitionLatl = ` [+ PLOSIVE] = kp / p / b / d / t / g / k [- PLOSIVE] = m / n / s / z @@ -85,14 +108,14 @@ const featureDefinitionLatl = ` ` const tokenizedFeature = [ - { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, - { type: "equal", value: "=" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" }, - { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, - { type: "equal", value: "=" }, { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "slash", value: "/" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, - { type: "openBracket", value: "[" }, { type: "variable", value: "SONORANT" }, + { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, + { type: "equal", value: "=" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' }, + { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, + { type: "equal", value: "=" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: "slash", value: "/" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: 'lineBreak', value: '' }, + { type: "openBracket", value: "[" }, { type: "referent", value: "SONORANT" }, { type: 'lineBreak', value: '' }, { type: "positiveAssignment", value: "+=" }, - { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, - { type: "negativeAssignment", value: "-=" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "slash", value: "/" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" }, + { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: 'lineBreak', value: '' }, + { type: "negativeAssignment", value: "-=" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: "slash", value: "/" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' }, { type: "closeBracket", value: "]" }, ] @@ -104,8 +127,8 @@ const lexiconDefinitionLatl = ` ` const tokenizedLexicon = [ - { type: "slash", value: "/" }, { type: "variable", value: "PROTO" }, - { type: "variable", value: "kpn" }, - { type: "variable", value: "sm" }, + { type: "slash", value: "/" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' }, + { type: "referent", value: "kpn" }, { type: 'lineBreak', value: '' }, + { type: "referent", value: "sm" }, { type: 'lineBreak', value: '' }, { type: "slash", value: "/" } ] \ No newline at end of file diff --git a/src/reducers/reducer.results.js b/src/reducers/reducer.results.js index ca86976..715fa1c 100644 --- a/src/reducers/reducer.results.js +++ b/src/reducers/reducer.results.js @@ -263,10 +263,10 @@ export const run = (state: stateType, action: resultsAction): stateType => { const passResults = state.epochs.reduce((results, epoch, _) => { const { phones, features, lexicon } = state; let lexiconBundle; - if ( epoch.parent ) { + if ( epoch.parent && results[epoch.parent] ) { lexiconBundle = results.find(result => result.pass === epoch.parent).lexicon } - if (!epoch.parent) { + if (!epoch.parent || !results[epoch.parent]) { lexiconBundle = formBundleFromLexicon(lexicon)(phones); } const ruleBundle = decomposeRules(epoch, phones);