add tokenizer for epoch, feature, and lexicon tokens

2020-03-01 22:42:35 -08:00 · 2020-03-01 22:42:35 -08:00 · 6bd425ac34
commit 6bd425ac34
parent d5d1eb2fa2
2 changed files with 121 additions and 2 deletions
--- a/src/reducers/reducer.latl.js
+++ b/src/reducers/reducer.latl.js
@ -9,3 +9,61 @@ export const parseLatl = (state, action) => {
  let latl = state.action;
  return { ...state }
 }
 const getOneToken = (latl, tokens) => {
  for (const [type, regEx] of tokenTypes) {
    const newRegEx = new RegExp(`^(${regEx})`);
    const match = latl.match(newRegEx) || null;
    if (match) {
      const newTokens = [...tokens, match[0]]
      const newLatl = latl.slice(match[0].length ,).trim();
      return [newLatl, newTokens]
    }
  }
  throw `Unexpected token at ${latl.split('\n')[0]}` 
 }
 export const tokenize = latl => {
  let i = 0;
  let tokens = [];
  let newLatl = latl.trim();
  try {
    while(newLatl.length) {
      [newLatl, tokens] = getOneToken(newLatl, tokens)
    }
    return tokens;
  } 
  catch (err) {
    return {errors: 'tokenization error', message: err}
  }
 }
 export const generateAST = latl => {
  // tokenize
  const tokens = tokenize(latl);
  // build tree
 }
 const tokenTypes = [
 [`star`, `\\*`],
 ['pipe', `\\|`],
 ['openBracket', `\\[`],
 ['closeBracket', `\\]`],
 ['positiveAssignment', `\\+=`],
 ['negativeAssignment', `\\-=`],
 ['plus', `\\+`],
 ['minus', `\\-`],
 ['greaterThan', `\\>`],
 ['hash', `#`],
 ['slash', `\/`],
 ['dot', `\\.`],
 ['loDash', `\\_`],
 [`variable`, `[A-Za-z]+`],
 ['equal', `=`]
 // [`lineBreak`, `\\n`],
 // [`whiteSpace`, `\\s+`]
 ]
--- a/src/reducers/reducer.latl.test.js
+++ b/src/reducers/reducer.latl.test.js
@ -1,5 +1,6 @@
 import { stateReducer } from './reducer';
 import { initState } from './reducer.init';
 import { tokenize } from './reducer.latl';
 describe('LATL', () => {
  it('returns state unaltered with no action body', () => {
@ -11,4 +12,64 @@ describe('LATL', () => {
    const returnedState = stateReducer(state, action)
    expect(returnedState).toStrictEqual(state);
  })
  it('returns tokens from well-formed latl epoch definition', () => {
    const tokens = tokenize(epochDefinitionLatl);
    expect(tokens).toStrictEqual(tokenizedEpoch)
  });
  it('returns tokens from well-formed latl feature definition', () => {
    const tokens = tokenize(featureDefinitionLatl);
    expect(tokens).toStrictEqual(tokenizedFeature);
  })
  it('returns tokens from well-formed latl lexicon definition', () => {
    const tokens = tokenize(lexiconDefinitionLatl);
    expect(tokens).toStrictEqual(tokenizedLexicon);
  })
 })
 const epochDefinitionLatl = `
 *PROTO
 [+ FEATURE]>[- FEATURE]/._.
 n>m/#_.
 |CHILD
 `
 const tokenizedEpoch = [ 
  '*', 'PROTO', 
  '[', '+', 'FEATURE', ']', '>', '[', '-', 'FEATURE', ']', '/', '.', '_', '.',
  'n', '>', 'm', '/', '#', '_', '.',
  '|', 'CHILD'
 ]
 const featureDefinitionLatl = `
 [+ PLOSIVE] = kp / p / b / d / t / g / k
 [- PLOSIVE] = m / n / s / z
 [SONORANT 
  += m / n
  -= s / z / kp / p / b / d / t / g / k
 ]
 `
 const tokenizedFeature = [
  '[', '+', 'PLOSIVE', ']', '=', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k',
  '[', '-', 'PLOSIVE', ']', '=', 'm', '/', 'n', '/', 's', '/', 'z',
  '[', 'SONORANT',
    '+=', 'm', '/', 'n',
    '-=', 's', '/', 'z', '/', 'kp', '/', 'p', '/', 'b', '/', 'd', '/', 't', '/', 'g', '/', 'k',
  ']'
 ]
 const lexiconDefinitionLatl = `
 /PROTO
  kpn
  sm
 /
 `
 const tokenizedLexicon = [
  '/', 'PROTO',
    'kpn',
    'sm',
  '/'
 ]