refactor tokenize to return token objects

This commit is contained in:
Sorrel Bri 2020-03-01 23:03:33 -08:00
parent 6bd425ac34
commit 686a1b1ffc
2 changed files with 34 additions and 19 deletions

View file

@ -15,7 +15,7 @@ const getOneToken = (latl, tokens) => {
const newRegEx = new RegExp(`^(${regEx})`); const newRegEx = new RegExp(`^(${regEx})`);
const match = latl.match(newRegEx) || null; const match = latl.match(newRegEx) || null;
if (match) { if (match) {
const newTokens = [...tokens, match[0]] const newTokens = [...tokens, {type, value: match[0]}]
const newLatl = latl.slice(match[0].length ,).trim(); const newLatl = latl.slice(match[0].length ,).trim();
return [newLatl, newTokens] return [newLatl, newTokens]
} }
@ -39,11 +39,9 @@ export const tokenize = latl => {
} }
export const generateAST = latl => { export const generateAST = latl => {
// tokenize // tokenize
const tokens = tokenize(latl); const tokens = tokenize(latl);
// build tree // build tree
} }

View file

@ -21,12 +21,20 @@ describe('LATL', () => {
it('returns tokens from well-formed latl feature definition', () => { it('returns tokens from well-formed latl feature definition', () => {
const tokens = tokenize(featureDefinitionLatl); const tokens = tokenize(featureDefinitionLatl);
expect(tokens).toStrictEqual(tokenizedFeature); expect(tokens).toStrictEqual(tokenizedFeature);
}) });
it('returns tokens from well-formed latl lexicon definition', () => { it('returns tokens from well-formed latl lexicon definition', () => {
const tokens = tokenize(lexiconDefinitionLatl); const tokens = tokenize(lexiconDefinitionLatl);
expect(tokens).toStrictEqual(tokenizedLexicon); expect(tokens).toStrictEqual(tokenizedLexicon);
}) });
it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => {
const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl;
const tokens = tokenize(latl);
const tokenizedLatl = [...tokenizedEpoch, ...tokenizedFeature, ...tokenizedLexicon];
expect(tokens).toStrictEqual(tokenizedLatl);
});
}) })
const epochDefinitionLatl = ` const epochDefinitionLatl = `
*PROTO *PROTO
@ -36,10 +44,16 @@ n>m/#_.
` `
const tokenizedEpoch = [ const tokenizedEpoch = [
'*', 'PROTO', { type: "star", value: "*" }, { type: "variable", value: "PROTO" },
'[', '+', 'FEATURE', ']', '>', '[', '-', 'FEATURE', ']', '/', '.', '_', '.', { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
'n', '>', 'm', '/', '#', '_', '.', { type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
'|', 'CHILD' { type: "slash", value: "/" }, { type: "dot", value: "." },
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
{ type: "variable", value: "n" },
{ type: "greaterThan", value: ">" }, { type: "variable", value: "m" },
{ type: "slash", value: "/" }, { type: "hash", value: "#" },
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
{ type: "pipe", value: "|" }, { type: "variable", value: "CHILD" }
] ]
const featureDefinitionLatl = ` const featureDefinitionLatl = `
@ -52,12 +66,15 @@ const featureDefinitionLatl = `
` `
const tokenizedFeature = [ const tokenizedFeature = [
'[', '+', 'PLOSIVE', ']', '=', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k', { type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
'[', '-', 'PLOSIVE', ']', '=', 'm', '/', 'n', '/', 's', '/', 'z', { type: "equal", value: "=" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
'[', 'SONORANT', { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
'+=', 'm', '/', 'n', { type: "equal", value: "=" }, { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "slash", value: "/" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" },
'-=', 's', '/', 'z', '/', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k', { type: "openBracket", value: "[" }, { type: "variable", value: "SONORANT" },
']' { type: "positiveAssignment", value: "+=" },
{ type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" },
{ type: "negativeAssignment", value: "-=" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "slash", value: "/" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
{ type: "closeBracket", value: "]" },
] ]
const lexiconDefinitionLatl = ` const lexiconDefinitionLatl = `
@ -68,8 +85,8 @@ const lexiconDefinitionLatl = `
` `
const tokenizedLexicon = [ const tokenizedLexicon = [
'/', 'PROTO', { type: "slash", value: "/" }, { type: "variable", value: "PROTO" },
'kpn', { type: "variable", value: "kpn" },
'sm', { type: "variable", value: "sm" },
'/' { type: "slash", value: "/" }
] ]