News:

Welcome to RetroCoders Community

Main Menu

Tokenizer in javaScript

Started by aurel, Apr 16, 2023, 06:34 PM

Previous topic - Next topic

aurel

are you ready ?  ;D

GO...

Finished Result

All in, your code should look something like this:

import helpers from "./helpers"

const tokenize = input => {
  const tokens = []

  let cursor = 0

  while (cursor < input.length) {
    const character = input[cursor]

    if (helpers.isParenthesis(character)) {
      tokens.push({
        type: "Parenthesis",
        value: character,
      })
      cursor++
      continue
    }

    if (helpers.isWhitespace(character)) {
      cursor++
      continue
    }

    if (helpers.isNumber(character)) {
      let number = character

      /**
       * We want to account for multi-digit numbers, so we
       * look ahead in our string to see if the next character
       * is a number. We assume white space is the end of a number.
       */
      while (helpers.isNumber(input[++cursor])) {
        number += input[cursor]
      }

      tokens.push({
        type: "Number",
        value: parseInt(number, 10),
      })

      continue
    }

    if (helpers.isLetter(character)) {
      let symbol = character

      /**
       * We want to account for words, so we look ahead in our
       * string to see if the next character is a letter.
       *
       * We assume white space is the end of a word.
       */
      while (helpers.isLetter(input[++cursor])) {
        symbol += input[cursor]
      }

      tokens.push({
        type: "Name",
        value: symbol,
      })

      continue
    }

    if (helpers.isQuote(character)) {
      let string = ""

      while (!helpers.isQuote(input[++cursor])) {
        string += input[cursor]
      }
      tokens.push({
        type: "String",
        value: string,
      })

      cursor++
      continue
    }

    throw new Error(`${character} is not valid.`)
  }
  return tokens
}

export default tokenize
WOULD YOU LIKE TO KNOW MORE?
basic4us.epizy.com/forum/index.php

CharlieJV

When presenting content that isn't your own creation, it is a kindness (and a good ethical practice) to give proper attribution to the creator of the content and/or a link to the source: https://ruairidh.dev/build-your-own-programming-language/

aurel

Ouch...  ;D
sorry it was quick lurk over
I'm more or less following Steve Kinney's path in this, so I've got a good example.
so i am not sure how i landed on this one ..ahh yes i was lurking about
local variables
WOULD YOU LIKE TO KNOW MORE?
basic4us.epizy.com/forum/index.php

aurel

OMG
 ;D

I am so stupid for js programming
and the stupid is way how all this concept work
i try few examples

1.you need to have index.html file

(i kind a know that  ::) )

2.you need to have javaScript file myScript.js

(also i kind of understand this ..arrgh)

but because of different browsers result is not always the same
( so all that about portability..in general should be same but is not )
  ( similar crap like any C is a C but is not )


well ..i need to investigate this more .grrr....
(well that frekin* tokenizer not execute in my bro  :o  ..) sorry bro-wser  ;D   >:(
WOULD YOU LIKE TO KNOW MORE?
basic4us.epizy.com/forum/index.php

aurel

warning !!!!

in above post frekin* tokenizer ..is not expression  ;D
and no is not pointer....

I NEED A COFFE  blurp ...snip ...blurp  :-[
WOULD YOU LIKE TO KNOW MORE?
basic4us.epizy.com/forum/index.php

aurel

hi hi
I download add-on js debugger and i get Pad to ..nice
so it looks like this in SeaMonkey browser

WOULD YOU LIKE TO KNOW MORE?
basic4us.epizy.com/forum/index.php

aurel

and another one from kBasic github

kbasic.prototype.tokenize = function(input /*line*/) {
  if (input == null || input.length < 1) {
    return;
  }
  var input = String(input).trim();
  var sum = "";
  var i = 0;
  var c = input[0];

  if (input[0] == "S" && Is.space(input[1])) {
    this._tokens.addToken(new Token("S", "PRINT_DEBUG"));
    i++;
  }
  this.error = false;
  while (i < input.length) {
    var c = input[i];
    var next;
    if (i + 1 < input.length) {
      next = input[i + 1];
    } else {
      next = -1;
    }

    if (c == ".") {
      var start = i;
      i++;
      c = input[i];
      while (i < input.length && Is.digit(c)) {
        i++;
        c = input[i];
      }
      var number = input.substring(start, i);
      var t = new Token(number, "NUMBER");
      this.addToken(t);
      continue;
    }
    if (Is.digit(c)) {
      var start = i;
      while (i < input.length && Is.digit(c)) {
        i++;
        c = input[i];
      }

      if (i < input.length && c == ".") {
        i++;
        c = input[i];
        while (i < input.length && Is.digit(c)) {
          i++;
          c = input[i];
        }
      } else if (i < input.length && c == "E") {
        i++;
        c = input[i];
        if (i < input.length && (c == "+" || c == "-")) {
          i++;
          c = input[i];
          while (i < input.length && Is.digit(c)) {
            i++;
            c = input[i];
          }
        }
      }

      var number = input.substring(start, i);

      if (c == "#" || c == "!") {
        //ignore 123#
        i++;
      }

      var t = new Token(number, "NUMBER");
      this.addToken(t);

      continue;
    }
    if (c == "&") {
      if (next == "&") {
        i += 2;
        var t = new Token("&&", "LOGICAL_OPERATOR");
        this.addToken(t);
        continue;
      }
      var base = 8;
      var start = i + 1;
      if (next.toUpperCase() == "H") {
        base = 16;
        i += 2;
        start++;
      } else if (next.toUpperCase() == "O") {
        i += 2;
        start++;
      }

      if (i < input.length) c = input[i];
      else c = -1;

      while (i < input.length && Is.digit_in_base(c, base)) {
        i++;
        c = input[i];
      }
      var number = input.substring(start, i);
      var n = parseInt(number, base);
      var t = new Token(n.toString(), "NUMBER");
      this.addToken(t);
      continue;
    }

    if (Is.alpha(c)) {
      var start = i;
      while (i < input.length && Is.alnum(c)) {
        i++;
        c = input[i];
      }

      if (c == "$" || c == "!" || c == "#" || c == "%") {
        i++;
      }

      identifier = input.substring(start, i);
      r = identifier.match(regexKeywords);
      if (r && r[0]) {
        var n = identifier.toUpperCase();
        if (n === "OR") {
          t = new Token("||", "LOGICAL_OPERATOR");
        } else if (n === "AND") {
          t = new Token("&&", "LOGICAL_OPERATOR");
        } else if (n === "MOD") {
          t = new Token(n, "MULT_OPERATOR");
        } else {
          t = new Token(identifier.toUpperCase(), "KEYWORD");
        }
      } else {
        r = identifier.match(regexFunctions);
        if (r && r[0]) {
          t = new Token(identifier.toUpperCase(), "FUNCTION");
        } else {
          t = new Token(identifier, "IDENTIFIER");
        }
      }
      this.addToken(t);
      continue;
    }

    if (c == '"') {
      var start = i;
      i++;
      if (i >= input.length) {
        this.error = true;
        break;
      }
      c = input[i];
      while (i < input.length && c != '"') {
        i++;
        c = input[i];
      }
      if (c != '"') {
        this.error = true;
        break;
      }
      i++;
      var str = input.substring(start, i);
      var t = new Token(str.substring(1, str.length - 1), "STRING");
      this.addToken(t);
      continue;
    }
    // 'STRING' too
    if (c == "'") {
      var start = i;
      i++;
      if (i >= input.length) {
        this.error = true;
        break;
      }
      c = input[i];
      while (i < input.length && c != "'") {
        i++;
        c = input[i];
      }
      if (c != "'") {
        this.error = true;
        break;
      }
      i++;
      var str = input.substring(start, i);
      var t = new Token(str.substring(1, str.length - 1), "STRING");
      this.addToken(t);
      continue;
    }
    if (Is.space(c)) {
      while (i < input.length && Is.space(c)) {
        i++;
        c = input[i];
      }
      continue;
    }

    if (c == "*" || c == "/" || c == "^" || c == "\\") {
      i++;
      var t = new Token(c, "MULT_OPERATOR");
      this.addToken(t);
      continue;
    }

    if (c == "(") {
      i++;
      var t = new Token(c, "OPENPAREN");
      this.addToken(t);
      continue;
    }

    if (c == ")") {
      i++;
      var t = new Token(c, "CLOSEPAREN");
      this.addToken(t);
      continue;
    }

    if (c == "+" || c == "-") {
      i++;
      var t = new Token(c, "PLUS_OPERATOR");
      this.addToken(t);
      continue;
    }
    if (c == "=") {
      i++;
      var t = new Token(c, "ASSIGNMENT");
      this.addToken(t);
      continue;
    }

    if (c == ">" || c == "<") {
      i++;
      if (c == "<" && next == ">") {
        i++;
        t = new Token("!=", "RELATIONAL");
      } else {
        if (next == "=") {
          i++;
          t = new Token(c + "=", "RELATIONAL");
        } else {
          t = new Token(c, "RELATIONAL");
        }
      }
      this.addToken(t);
      continue;
    }
    if (c == "\r" || c == "\n") {
      i++;
      if (c == "\r" && next == "\n") {
        i++;
      }
      var t = new Token("--", "ENDOFLINE");
      this.addToken(t);
      continue;
    }
    if (c == ":") {
      i++;
      var t = new Token("&&", "LOGICAL_OPERATOR");
      this.addToken(t);
      continue;
    }
    if (c == "|" && next == "|") {
        i+=2;
        var t = new Token("||", "LOGICAL_OPERATOR");
        this.addToken(t);
        continue;
      }
      var t = new Token(c, "CHARACTER");
    this.addToken(t);
    i++;
  } //while main
  if (this.error) throw "ERROR: " + this.error;
};
WOULD YOU LIKE TO KNOW MORE?
basic4us.epizy.com/forum/index.php