217 lines
6.4 KiB
C
217 lines
6.4 KiB
C
/*
|
|
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include <rcache.h>
|
|
#include <jlexer.h>
|
|
|
|
#define JLEXCONT_UNDEF 0
|
|
#define JLEXCONT_WORD 1
|
|
#define JLEXCONT_BLOCKB 2
|
|
#define JLEXCONT_BLOCKE 3
|
|
#define JLEXCONT_SEPAR 4
|
|
#define JLEXCONT_NUM 5
|
|
#define JLEXCONT_UNKNOW 6
|
|
#define JLEXCONT_END 9
|
|
|
|
#define JLEXTYPE_UNDEF 0
|
|
#define JLEXTYPE_BLOCKB 1
|
|
#define JLEXTYPE_BLOCKE 2
|
|
#define JLEXTYPE_WORDL 3
|
|
#define JLEXTYPE_SPACE 4
|
|
#define JLEXTYPE_SEPAR 5
|
|
#define JLEXTYPE_NUM 6
|
|
#define JLEXTYPE_COMMA 7
|
|
#define JLEXTYPE_CHAR 8
|
|
#define JLEXTYPE_EOF 9
|
|
|
|
|
|
static int get_ltype(char letter) {
|
|
switch (letter) {
|
|
case EOF:
|
|
return JLEXTYPE_EOF;
|
|
case '{':
|
|
return JLEXTYPE_BLOCKB;
|
|
case '}':
|
|
return JLEXTYPE_BLOCKE;
|
|
case '"':
|
|
return JLEXTYPE_WORDL;
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
return JLEXTYPE_SPACE;
|
|
case ':':
|
|
return JLEXTYPE_SEPAR;
|
|
case ',':
|
|
return JLEXTYPE_COMMA;
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
case '0':
|
|
case '.':
|
|
case 'E':
|
|
case 'e':
|
|
case '+':
|
|
case '-':
|
|
return JLEXTYPE_NUM;
|
|
default:
|
|
return JLEXTYPE_CHAR;
|
|
}
|
|
return JLEXTYPE_UNDEF;
|
|
}
|
|
|
|
void jlexer_init(jlexer_t * lexer, rcache_t * cache) {
|
|
lexer->cache = cache;
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
lexer->tokpos = 0;
|
|
lexer->letter = EOF;
|
|
lexer->rewind = false;
|
|
}
|
|
|
|
int jlexer_gettoken(jlexer_t * lexer, char* token) {
|
|
|
|
while (true) {
|
|
if (!lexer->rewind) {
|
|
lexer->letter = rcache_getc(lexer->cache);
|
|
}
|
|
lexer->rewind = false;
|
|
int type = get_ltype(lexer->letter);
|
|
|
|
switch (lexer->context) {
|
|
case JLEXCONT_END:{
|
|
lexer->context = JLEXCONT_END;
|
|
lexer->tokpos = 0;
|
|
//lexer->rewind = true;
|
|
strcpy(token, "EOF");
|
|
return JLEXTOK_END;
|
|
}
|
|
case JLEXCONT_UNDEF:{
|
|
switch (type) {
|
|
case JLEXTYPE_EOF:{
|
|
lexer->context = JLEXCONT_END;
|
|
lexer->rewind = true;
|
|
break;
|
|
}
|
|
case JLEXTYPE_BLOCKB:{
|
|
strcpy(token, "BLOCK BEGIN");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_BLOCKB;
|
|
}
|
|
case JLEXTYPE_BLOCKE:{
|
|
strcpy(token, "BLOCK END");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_BLOCKE;
|
|
}
|
|
case JLEXTYPE_SEPAR:{
|
|
strcpy(token, "IS");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_SEPAR;
|
|
}
|
|
case JLEXTYPE_COMMA:{
|
|
strcpy(token, "NEXT");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_NEXT;
|
|
}
|
|
case JLEXTYPE_WORDL:{
|
|
lexer->tokpos = 0;
|
|
//char* prefix = "STRING ";
|
|
//strcpy(token, prefix);
|
|
//lexer->tokpos =+ strlen(prefix);
|
|
lexer->context = JLEXCONT_WORD;
|
|
break;
|
|
}
|
|
case JLEXTYPE_NUM:{
|
|
lexer->tokpos = 0;
|
|
//char* prefix = "NUMBER ";
|
|
//strcpy(token, prefix);
|
|
//lexer->tokpos =+ strlen(prefix);
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = JLEXCONT_NUM;
|
|
break;
|
|
}
|
|
case JLEXTYPE_CHAR:{
|
|
lexer->tokpos = 0;
|
|
char* prefix = "WTF? ";
|
|
|
|
strcpy(token, prefix);
|
|
lexer->tokpos = +strlen(prefix);
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = JLEXCONT_UNKNOW;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case JLEXCONT_WORD:{
|
|
int newcontext = lexer->context;
|
|
|
|
switch (type) {
|
|
case JLEXTYPE_EOF:
|
|
case JLEXTYPE_WORDL:{
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_WORD;
|
|
}
|
|
}
|
|
lexer->context = newcontext;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case JLEXCONT_NUM:{
|
|
int newcontext = lexer->context;
|
|
|
|
switch (type) {
|
|
case JLEXTYPE_NUM:
|
|
break;
|
|
case JLEXTYPE_EOF:
|
|
default:
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->rewind = true;
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_NUMB;
|
|
}
|
|
lexer->context = newcontext;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case JLEXCONT_UNKNOW:{
|
|
int newcontext = lexer->context;
|
|
|
|
switch (type) {
|
|
case JLEXTYPE_CHAR:
|
|
case JLEXTYPE_NUM:
|
|
break;
|
|
case JLEXTYPE_EOF:
|
|
default:{
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->rewind = true;
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_UNKNOW;
|
|
}
|
|
}
|
|
lexer->context = newcontext;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
strcpy(token, "UNDEF");
|
|
return JLEXTOK_UNDEF;
|
|
}
|
|
|
|
void jlexer_destroy(jlexer_t * lexer) {
|
|
(void)lexer;
|
|
}
|