refactor tokenize to return token objects
This commit is contained in:
parent
6bd425ac34
commit
686a1b1ffc
2 changed files with 34 additions and 19 deletions
|
@ -15,7 +15,7 @@ const getOneToken = (latl, tokens) => {
|
||||||
const newRegEx = new RegExp(`^(${regEx})`);
|
const newRegEx = new RegExp(`^(${regEx})`);
|
||||||
const match = latl.match(newRegEx) || null;
|
const match = latl.match(newRegEx) || null;
|
||||||
if (match) {
|
if (match) {
|
||||||
const newTokens = [...tokens, match[0]]
|
const newTokens = [...tokens, {type, value: match[0]}]
|
||||||
const newLatl = latl.slice(match[0].length ,).trim();
|
const newLatl = latl.slice(match[0].length ,).trim();
|
||||||
return [newLatl, newTokens]
|
return [newLatl, newTokens]
|
||||||
}
|
}
|
||||||
|
@ -39,11 +39,9 @@ export const tokenize = latl => {
|
||||||
}
|
}
|
||||||
|
|
||||||
export const generateAST = latl => {
|
export const generateAST = latl => {
|
||||||
|
|
||||||
// tokenize
|
// tokenize
|
||||||
const tokens = tokenize(latl);
|
const tokens = tokenize(latl);
|
||||||
|
|
||||||
|
|
||||||
// build tree
|
// build tree
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,20 @@ describe('LATL', () => {
|
||||||
it('returns tokens from well-formed latl feature definition', () => {
|
it('returns tokens from well-formed latl feature definition', () => {
|
||||||
const tokens = tokenize(featureDefinitionLatl);
|
const tokens = tokenize(featureDefinitionLatl);
|
||||||
expect(tokens).toStrictEqual(tokenizedFeature);
|
expect(tokens).toStrictEqual(tokenizedFeature);
|
||||||
})
|
});
|
||||||
|
|
||||||
it('returns tokens from well-formed latl lexicon definition', () => {
|
it('returns tokens from well-formed latl lexicon definition', () => {
|
||||||
const tokens = tokenize(lexiconDefinitionLatl);
|
const tokens = tokenize(lexiconDefinitionLatl);
|
||||||
expect(tokens).toStrictEqual(tokenizedLexicon);
|
expect(tokens).toStrictEqual(tokenizedLexicon);
|
||||||
})
|
});
|
||||||
|
|
||||||
|
it('returns tokens from well-formed latl epoch, feature, and lexicon definitions', () => {
|
||||||
|
const latl = epochDefinitionLatl + '\n' + featureDefinitionLatl + '\n' + lexiconDefinitionLatl;
|
||||||
|
const tokens = tokenize(latl);
|
||||||
|
const tokenizedLatl = [...tokenizedEpoch, ...tokenizedFeature, ...tokenizedLexicon];
|
||||||
|
expect(tokens).toStrictEqual(tokenizedLatl);
|
||||||
|
});
|
||||||
|
|
||||||
})
|
})
|
||||||
const epochDefinitionLatl = `
|
const epochDefinitionLatl = `
|
||||||
*PROTO
|
*PROTO
|
||||||
|
@ -36,10 +44,16 @@ n>m/#_.
|
||||||
`
|
`
|
||||||
|
|
||||||
const tokenizedEpoch = [
|
const tokenizedEpoch = [
|
||||||
'*', 'PROTO',
|
{ type: "star", value: "*" }, { type: "variable", value: "PROTO" },
|
||||||
'[', '+', 'FEATURE', ']', '>', '[', '-', 'FEATURE', ']', '/', '.', '_', '.',
|
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
|
||||||
'n', '>', 'm', '/', '#', '_', '.',
|
{ type: "greaterThan", value: ">" }, { type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "FEATURE" }, { type: "closeBracket", value: "]" },
|
||||||
'|', 'CHILD'
|
{ type: "slash", value: "/" }, { type: "dot", value: "." },
|
||||||
|
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
|
||||||
|
{ type: "variable", value: "n" },
|
||||||
|
{ type: "greaterThan", value: ">" }, { type: "variable", value: "m" },
|
||||||
|
{ type: "slash", value: "/" }, { type: "hash", value: "#" },
|
||||||
|
{ type: "loDash", value: "_" }, { type: "dot", value: "." },
|
||||||
|
{ type: "pipe", value: "|" }, { type: "variable", value: "CHILD" }
|
||||||
]
|
]
|
||||||
|
|
||||||
const featureDefinitionLatl = `
|
const featureDefinitionLatl = `
|
||||||
|
@ -52,12 +66,15 @@ const featureDefinitionLatl = `
|
||||||
`
|
`
|
||||||
|
|
||||||
const tokenizedFeature = [
|
const tokenizedFeature = [
|
||||||
'[', '+', 'PLOSIVE', ']', '=', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k',
|
{ type: "openBracket", value: "[" }, { type: "plus", value: "+" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
|
||||||
'[', '-', 'PLOSIVE', ']', '=', 'm', '/', 'n', '/', 's', '/', 'z',
|
{ type: "equal", value: "=" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
|
||||||
'[', 'SONORANT',
|
{ type: "openBracket", value: "[" }, { type: "minus", value: "-" }, { type: "variable", value: "PLOSIVE" }, { type: "closeBracket", value: "]" },
|
||||||
'+=', 'm', '/', 'n',
|
{ type: "equal", value: "=" }, { type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" }, { type: "slash", value: "/" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" },
|
||||||
'-=', 's', '/', 'z', '/', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k',
|
{ type: "openBracket", value: "[" }, { type: "variable", value: "SONORANT" },
|
||||||
']'
|
{ type: "positiveAssignment", value: "+=" },
|
||||||
|
{ type: "variable", value: "m" }, { type: "slash", value: "/" }, { type: "variable", value: "n" },
|
||||||
|
{ type: "negativeAssignment", value: "-=" }, { type: "variable", value: "s" }, { type: "slash", value: "/" }, { type: "variable", value: "z" }, { type: "slash", value: "/" }, { type: "variable", value: "kp" }, { type: "slash", value: "/" }, { type: "variable", value: "p" }, { type: "slash", value: "/" }, { type: "variable", value: "b" }, { type: "slash", value: "/" }, { type: "variable", value: "d" }, { type: "slash", value: "/" }, { type: "variable", value: "t" }, { type: "slash", value: "/" }, { type: "variable", value: "g" }, { type: "slash", value: "/" }, { type: "variable", value: "k" },
|
||||||
|
{ type: "closeBracket", value: "]" },
|
||||||
]
|
]
|
||||||
|
|
||||||
const lexiconDefinitionLatl = `
|
const lexiconDefinitionLatl = `
|
||||||
|
@ -68,8 +85,8 @@ const lexiconDefinitionLatl = `
|
||||||
`
|
`
|
||||||
|
|
||||||
const tokenizedLexicon = [
|
const tokenizedLexicon = [
|
||||||
'/', 'PROTO',
|
{ type: "slash", value: "/" }, { type: "variable", value: "PROTO" },
|
||||||
'kpn',
|
{ type: "variable", value: "kpn" },
|
||||||
'sm',
|
{ type: "variable", value: "sm" },
|
||||||
'/'
|
{ type: "slash", value: "/" }
|
||||||
]
|
]
|
Loading…
Reference in a new issue