add tokenizer for epoch, feature, and lexicon tokens
This commit is contained in:
parent
d5d1eb2fa2
commit
6bd425ac34
2 changed files with 121 additions and 2 deletions
|
@ -9,3 +9,61 @@ export const parseLatl = (state, action) => {
|
|||
let latl = state.action;
|
||||
return { ...state }
|
||||
}
|
||||
|
||||
const getOneToken = (latl, tokens) => {
|
||||
for (const [type, regEx] of tokenTypes) {
|
||||
const newRegEx = new RegExp(`^(${regEx})`);
|
||||
const match = latl.match(newRegEx) || null;
|
||||
if (match) {
|
||||
const newTokens = [...tokens, match[0]]
|
||||
const newLatl = latl.slice(match[0].length ,).trim();
|
||||
return [newLatl, newTokens]
|
||||
}
|
||||
}
|
||||
throw `Unexpected token at ${latl.split('\n')[0]}`
|
||||
}
|
||||
|
||||
export const tokenize = latl => {
|
||||
let i = 0;
|
||||
let tokens = [];
|
||||
let newLatl = latl.trim();
|
||||
try {
|
||||
while(newLatl.length) {
|
||||
[newLatl, tokens] = getOneToken(newLatl, tokens)
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
catch (err) {
|
||||
return {errors: 'tokenization error', message: err}
|
||||
}
|
||||
}
|
||||
|
||||
export const generateAST = latl => {
|
||||
|
||||
// tokenize
|
||||
const tokens = tokenize(latl);
|
||||
|
||||
|
||||
// build tree
|
||||
|
||||
}
|
||||
|
||||
const tokenTypes = [
|
||||
[`star`, `\\*`],
|
||||
['pipe', `\\|`],
|
||||
['openBracket', `\\[`],
|
||||
['closeBracket', `\\]`],
|
||||
['positiveAssignment', `\\+=`],
|
||||
['negativeAssignment', `\\-=`],
|
||||
['plus', `\\+`],
|
||||
['minus', `\\-`],
|
||||
['greaterThan', `\\>`],
|
||||
['hash', `#`],
|
||||
['slash', `\/`],
|
||||
['dot', `\\.`],
|
||||
['loDash', `\\_`],
|
||||
[`variable`, `[A-Za-z]+`],
|
||||
['equal', `=`]
|
||||
// [`lineBreak`, `\\n`],
|
||||
// [`whiteSpace`, `\\s+`]
|
||||
]
|
|
@ -1,5 +1,6 @@
|
|||
import { stateReducer } from './reducer';
|
||||
import { initState } from './reducer.init';
|
||||
import { tokenize } from './reducer.latl';
|
||||
|
||||
describe('LATL', () => {
|
||||
it('returns state unaltered with no action body', () => {
|
||||
|
@ -11,4 +12,64 @@ describe('LATL', () => {
|
|||
const returnedState = stateReducer(state, action)
|
||||
expect(returnedState).toStrictEqual(state);
|
||||
})
|
||||
|
||||
it('returns tokens from well-formed latl epoch definition', () => {
|
||||
const tokens = tokenize(epochDefinitionLatl);
|
||||
expect(tokens).toStrictEqual(tokenizedEpoch)
|
||||
});
|
||||
|
||||
it('returns tokens from well-formed latl feature definition', () => {
|
||||
const tokens = tokenize(featureDefinitionLatl);
|
||||
expect(tokens).toStrictEqual(tokenizedFeature);
|
||||
})
|
||||
|
||||
it('returns tokens from well-formed latl lexicon definition', () => {
|
||||
const tokens = tokenize(lexiconDefinitionLatl);
|
||||
expect(tokens).toStrictEqual(tokenizedLexicon);
|
||||
})
|
||||
})
|
||||
const epochDefinitionLatl = `
|
||||
*PROTO
|
||||
[+ FEATURE]>[- FEATURE]/._.
|
||||
n>m/#_.
|
||||
|CHILD
|
||||
`
|
||||
|
||||
const tokenizedEpoch = [
|
||||
'*', 'PROTO',
|
||||
'[', '+', 'FEATURE', ']', '>', '[', '-', 'FEATURE', ']', '/', '.', '_', '.',
|
||||
'n', '>', 'm', '/', '#', '_', '.',
|
||||
'|', 'CHILD'
|
||||
]
|
||||
|
||||
const featureDefinitionLatl = `
|
||||
[+ PLOSIVE] = kp / p / b / d / t / g / k
|
||||
[- PLOSIVE] = m / n / s / z
|
||||
[SONORANT
|
||||
+= m / n
|
||||
-= s / z / kp / p / b / d / t / g / k
|
||||
]
|
||||
`
|
||||
|
||||
const tokenizedFeature = [
|
||||
'[', '+', 'PLOSIVE', ']', '=', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k',
|
||||
'[', '-', 'PLOSIVE', ']', '=', 'm', '/', 'n', '/', 's', '/', 'z',
|
||||
'[', 'SONORANT',
|
||||
'+=', 'm', '/', 'n',
|
||||
'-=', 's', '/', 'z', '/', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k',
|
||||
']'
|
||||
]
|
||||
|
||||
const lexiconDefinitionLatl = `
|
||||
/PROTO
|
||||
kpn
|
||||
sm
|
||||
/
|
||||
`
|
||||
|
||||
const tokenizedLexicon = [
|
||||
'/', 'PROTO',
|
||||
'kpn',
|
||||
'sm',
|
||||
'/'
|
||||
]
|
Loading…
Reference in a new issue