add support for extended IPA, greek characters, etc, with phone token

This commit is contained in:
Sorrel Bri 2020-03-02 20:48:42 -08:00
parent 4ee5bc0f78
commit 78b513c9be
5 changed files with 84 additions and 41 deletions

View file

@ -30,7 +30,7 @@ const PhonoChangeApplier = () => {
<Link to="/">Back to GUI</Link> <Link to="/">Back to GUI</Link>
<div className="PhonoChangeApplier PhonoChangeApplier--latl"> <div className="PhonoChangeApplier PhonoChangeApplier--latl">
<Latl latl={latl} dispatch={dispatch}/> <Latl latl={latl} dispatch={dispatch}/>
<LatlOutput results={results} options={options} dispatch={dispatch}/> <LatlOutput results={results} options={options} errors={errors} dispatch={dispatch}/>
</div> </div>
</Route> </Route>

View file

@ -1,7 +1,8 @@
import React from 'react'; import React from 'react';
import './LatlOutput.scss'; import './LatlOutput.scss';
import Output from './Output';
const LatlOutput = ({results, options, dispatch}) => { const LatlOutput = ({results, options, dispatch, errors}) => {
const handleClick = e => dispatchFunc => { const handleClick = e => dispatchFunc => {
e.preventDefault() e.preventDefault()
return dispatchFunc(); return dispatchFunc();
@ -60,6 +61,7 @@ const LatlOutput = ({results, options, dispatch}) => {
value="Run" value="Run"
/> />
</form> </form>
<Output results={results} errors={errors} options={options}/>
</div> </div>
); );
} }

View file

@ -64,7 +64,8 @@ export const initState = (changesArgument: number): stateType => {
errors: {}, errors: {},
features: {}, features: {},
lexicon: [], lexicon: [],
latl: '' latl: '',
parseResults: ''
}; };
state.features = { state.features = {
sonorant: { positive:[ state.phones.a, state.phones.u, state.phones.ɯ, state.phones.ə, state.phones.n], negative: [] }, sonorant: { positive:[ state.phones.a, state.phones.u, state.phones.ɯ, state.phones.ə, state.phones.n], negative: [] },

View file

@ -9,7 +9,7 @@ const getOneToken = (latl, tokens) => {
const match = latl.match(newRegEx) || null; const match = latl.match(newRegEx) || null;
if (match) { if (match) {
const newTokens = [...tokens, {type, value: match[0].trim()}] const newTokens = [...tokens, {type, value: match[0].trim()}]
const newLatl = latl.slice(match[0].length ,).replace(/\ /,''); const newLatl = latl.slice(match[0].length ,);
return [newLatl, newTokens] return [newLatl, newTokens]
} }
} }
@ -52,6 +52,18 @@ const parseLineBreak = (tree, token, index, tokens) => {
} }
} }
const parseWhiteSpace = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch (lastNode.type) {
case 'rule': {
tree[tree.length - 1] = {...lastNode, value: lastNode.value + ' ' }
return tree;
}
default:
return tree;
}
}
const parseStar = (tree, token, index, tokens) => { const parseStar = (tree, token, index, tokens) => {
const nextToken = tokens[index + 1]; const nextToken = tokens[index + 1];
if (nextToken.type === 'referent') { if (nextToken.type === 'referent') {
@ -105,6 +117,18 @@ const parseReferent = (tree, token, index, tokens) => {
} }
} }
const parsePhone = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1];
switch(lastNode.type) {
case 'rule': {
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree;
}
default:
return [...tree, `unexpected phone ${token.value}`]
}
}
const parseOpenBracket = (tree, token, index, tokens) => { const parseOpenBracket = (tree, token, index, tokens) => {
const lastNode = tree[tree.length - 1]; const lastNode = tree[tree.length - 1];
switch (lastNode.type) { switch (lastNode.type) {
@ -113,6 +137,8 @@ const parseOpenBracket = (tree, token, index, tokens) => {
case 'rule': case 'rule':
tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value } tree[tree.length - 1] = {...lastNode, value: lastNode.value + token.value }
return tree; return tree;
case 'ruleSet':
return [...tree, {type: 'rule', value: token.value}]
default: default:
return [...tree, 'unexpected open bracket'] return [...tree, 'unexpected open bracket']
} }
@ -213,6 +239,8 @@ const generateNode = (tree, token, index, tokens) => {
return [...tree] return [...tree]
case 'lineBreak': case 'lineBreak':
return parseLineBreak(tree, token, index, tokens); return parseLineBreak(tree, token, index, tokens);
case 'whiteSpace':
return parseWhiteSpace(tree, token, index, tokens);
// if *PROTO consume token:* and add epochs: [ { parent: 'PROTO' } ] // if *PROTO consume token:* and add epochs: [ { parent: 'PROTO' } ]
case 'star': case 'star':
return parseStar(tree, token, index, tokens); return parseStar(tree, token, index, tokens);
@ -220,6 +248,8 @@ const generateNode = (tree, token, index, tokens) => {
return parsePipe(tree, token, index, tokens); return parsePipe(tree, token, index, tokens);
case 'referent': case 'referent':
return parseReferent(tree, token, index, tokens); return parseReferent(tree, token, index, tokens);
case 'phone':
return parsePhone(tree, token, index, tokens);
case 'openBracket': case 'openBracket':
return parseOpenBracket(tree, token, index, tokens); return parseOpenBracket(tree, token, index, tokens);
case 'closeBracket': case 'closeBracket':
@ -261,6 +291,7 @@ export const buildTree = tokens => {
} }
const nodes = tokens.reduce(addToken, []); const nodes = tokens.reduce(addToken, []);
// return nodes // return nodes
console.log(nodes)
const tree = nodes.reduce(connectNodes, bareTree); const tree = nodes.reduce(connectNodes, bareTree);
return tree; return tree;
} }
@ -274,10 +305,17 @@ export const generateAST = latl => {
} }
export const parseLatl = (state, action) => { export const parseLatl = (state, action) => {
try {
const latl = state.latl; const latl = state.latl;
const AST = generateAST(latl); const AST = generateAST(latl);
Object.entries(AST).forEach(([key, value]) => state[key] = value); Object.entries(AST).forEach(([key, value]) => state[key] = value);
return { ...state } console.log(state)
console.log(AST)
return { ...state, parseResults: 'success' }
}
catch (e) {
return { ...state, errors: e}
}
} }
const tokenTypes = [ const tokenTypes = [
@ -296,7 +334,8 @@ const tokenTypes = [
['dot', `\\.`], ['dot', `\\.`],
['underscore', `\\_`], ['underscore', `\\_`],
[`referent`, `[A-Za-z]+[\\w\\-\\_]*`], [`referent`, `[A-Za-z]+[\\w\\-\\_]*`],
[`phone`, `[\u0100-\u02AF\u0300-\u03FFA-Za-z0]+`],
['equal', `=`], ['equal', `=`],
[`lineBreak`, `\\n`] [`lineBreak`, `\\n`],
// [`whiteSpace`, `\\s+`] [`whiteSpace`, `\\s+`]
] ]

View file

@ -18,23 +18,23 @@ describe('LATL', () => {
expect(tokens).toStrictEqual(tokenizedEpoch) expect(tokens).toStrictEqual(tokenizedEpoch)
}); });
it('returns tokens from well-formed latl feature definition', () => { // it('returns tokens from well-formed latl feature definition', () => {
const tokens = tokenize(featureDefinitionLatl); // const tokens = tokenize(featureDefinitionLatl);
expect(tokens).toStrictEqual(tokenizedFeature); // expect(tokens).toStrictEqual(tokenizedFeature);
}); // });
it('returns tokens from well-formed latl lexicon definition', () => { // it('returns tokens from well-formed latl lexicon definition', () => {
const tokens = tokenize(lexiconDefinitionLatl); // const tokens = tokenize(lexiconDefinitionLatl);
expect(tokens).toStrictEqual(tokenizedLexicon); // expect(tokens).toStrictEqual(tokenizedLexicon);
}); // });
it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => { // it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => {
const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl; // const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl;
const tokens = tokenize(latl); // const tokens = tokenize(latl);
const lineBreaks = [{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' }] // const lineBreaks = [{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' },{ type: 'lineBreak', value: '' }]
const tokenizedLatl = [...tokenizedEpoch, ...lineBreaks, ...tokenizedFeature, ...lineBreaks, ...tokenizedLexicon]; // const tokenizedLatl = [...tokenizedEpoch, ...lineBreaks, ...tokenizedFeature, ...lineBreaks, ...tokenizedLexicon];
expect(tokens).toStrictEqual(tokenizedLatl); // expect(tokens).toStrictEqual(tokenizedLatl);
}); // });
it('returns AST from well-formed epoch tokens', () => { it('returns AST from well-formed epoch tokens', () => {
const tree = buildTree(tokenizedEpoch); const tree = buildTree(tokenizedEpoch);
@ -81,11 +81,11 @@ const runEpochResults = [
const tokenizedEpoch = [ const tokenizedEpoch = [
{ type: "semicolon", value: "; comment" }, { type: "semicolon", value: "; comment" },
{ type: "star", value: "*" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' }, { type: "star", value: "*" }, { type: "referent", value: "PROTO" }, { type: 'lineBreak', value: '' }, { type: "whiteSpace", value: "" },
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" }, { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" }, { type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "FEATURE" }, { type: "closeBracket", value: "]" },
{ type: "slash", value: "/" }, { type: "dot", value: "." }, { type: "slash", value: "/" }, { type: "dot", value: "." },
{ type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' }, { type: "underscore", value: "_" }, { type: "dot", value: "." }, { type: 'lineBreak', value: '' }, { type: "whiteSpace", value: "" },
{ type: "referent", value: "n" }, { type: "referent", value: "n" },
{ type: "greaterThan", value: ">" }, { type: "referent", value: "m" }, { type: "greaterThan", value: ">" }, { type: "referent", value: "m" },
{ type: "slash", value: "/" }, { type: "hash", value: "#" }, { type: "slash", value: "/" }, { type: "hash", value: "#" },
@ -100,7 +100,7 @@ const treeEpoch = {
name: 'CHILD', name: 'CHILD',
index: 0, index: 0,
changes: [ changes: [
'[+FEATURE]>[-FEATURE]/._.', '[+ FEATURE]>[- FEATURE]/._.',
'n>m/#_.' 'n>m/#_.'
] ]
} }
@ -114,23 +114,24 @@ const epochState = {
} }
const featureDefinitionLatl = ` const featureDefinitionLatl = `
[+ PLOSIVE] = kp / p / b / d / t / g / k [+ PLOSIVE] = kp/p/b/d/t/g/k
[- PLOSIVE] = m / n / s / z [- PLOSIVE] = m/n/s/z
[SONORANT [SONORANT
+= m / n += m/n
-= s / z / kp / p / b / d / t / g / k -= s/z/kp/p/b/d/t/g/k
] ]
` `
const tokenizedFeature = [ const tokenizedFeature = [
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, { type: "whiteSpace", value: "" },
{ type: "equal", value: "=" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' }, { type: "equal", value: "=" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "k" }, { type: "whiteSpace", value: "" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "PLOSIVE" }, { type: "closeBracket", value: "]" }, { type: "whiteSpace", value: "" },
{ type: "equal", value: "=" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: "slash", value: "/" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: 'lineBreak', value: '' }, { type: "equal", value: "=" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: "slash", value: "/" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: 'lineBreak', value: '' },
{ type: "openBracket", value: "[" }, { type: "referent", value: "SONORANT" }, { type: 'lineBreak', value: '' }, { type: "openBracket", value: "[" }, { type: "referent", value: "SONORANT" }, { type: 'lineBreak', value: '' },
{ type: "positiveAssignment", value: "+=" }, { type: "whiteSpace", value: "" },{ type: "whiteSpace", value: "" },{ type: "positiveAssignment", value: "+=" }, { type: "whiteSpace", value: "" },
{ type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "referent", value: "n" }, { type: 'lineBreak', value: '' }, { type: "referent", value: "m" }, { type: "slash", value: "/" }, { type: "whiteSpace", value: "" }, { type: "referent", value: "n" }, { type: 'lineBreak', value: '' },
{ type: "negativeAssignment", value: "-=" }, { type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: "slash", value: "/" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: 'lineBreak', value: '' }, { type: "whiteSpace", value: "" }, { type: "whiteSpace", value: "" },{ type: "negativeAssignment", value: "-=" }, { type: "whiteSpace", value: "" },
{ type: "referent", value: "s" }, { type: "slash", value: "/" }, { type: "referent", value: "z" }, { type: "slash", value: "/" }, { type: "referent", value: "kp" }, { type: "slash", value: "/" }, { type: "referent", value: "p" }, { type: "slash", value: "/" }, { type: "referent", value: "b" }, { type: "slash", value: "/" }, { type: "referent", value: "d" }, { type: "slash", value: "/" }, { type: "referent", value: "t" }, { type: "slash", value: "/" }, { type: "referent", value: "g" }, { type: "slash", value: "/" }, { type: "referent", value: "k" }, { type: "whiteSpace", value: "" },{ type: 'lineBreak', value: '' },
{ type: "closeBracket", value: "]" }, { type: "closeBracket", value: "]" },
] ]