Files
cworker/clib/jlexer.c
2023-09-04 22:12:47 +02:00

233 lines
7.0 KiB
C

/*
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
*/
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <rcache.h>
#include <jlexer.h>
#define JLEXCONT_UNDEF 0x00
#define JLEXCONT_WORD 0x01
#define JLEXCONT_BLOCKB 0x02
#define JLEXCONT_BLOCKE 0x03
#define JLEXCONT_SEPAR 0x04
#define JLEXCONT_NUM 0x05
#define JLEXCONT_UNKNOW 0x06
#define JLEXCONT_END 0x99
#define JLEXTYPE_UNDEF 0x00
#define JLEXTYPE_BLOCKB 0x01
#define JLEXTYPE_BLOCKE 0x02
#define JLEXTYPE_WORDL 0x03
#define JLEXTYPE_SPACE 0x04
#define JLEXTYPE_SEPAR 0x05
#define JLEXTYPE_NUM 0x06
#define JLEXTYPE_COMMA 0x07
#define JLEXTYPE_CHAR 0x08
#define JLEXTYPE_ARRB 0x11
#define JLEXTYPE_ARRE 0x12
#define JLEXTYPE_EOF 0x99
static int get_ltype(char letter) {
switch (letter) {
case EOF:
return JLEXTYPE_EOF;
case '{':
return JLEXTYPE_BLOCKB;
case '}':
return JLEXTYPE_BLOCKE;
case '[':
return JLEXTYPE_ARRB;
case ']':
return JLEXTYPE_ARRE;
case '"':
return JLEXTYPE_WORDL;
case ' ':
case '\t':
case '\n':
return JLEXTYPE_SPACE;
case ':':
return JLEXTYPE_SEPAR;
case ',':
return JLEXTYPE_COMMA;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '0':
case '.':
case 'E':
case 'e':
case '+':
case '-':
return JLEXTYPE_NUM;
default:
return JLEXTYPE_CHAR;
}
return JLEXTYPE_UNDEF;
}
void jlexer_init(jlexer_t * lexer, rcache_t * cache) {
lexer->cache = cache;
lexer->context = JLEXCONT_UNDEF;
lexer->tokpos = 0;
lexer->letter = EOF;
lexer->rewind = false;
}
int jlexer_gettoken(jlexer_t * lexer, char* token) {
while (true) {
if (!lexer->rewind) {
lexer->letter = rcache_getc(lexer->cache);
}
lexer->rewind = false;
int type = get_ltype(lexer->letter);
switch (lexer->context) {
case JLEXCONT_END:{
lexer->context = JLEXCONT_END;
lexer->tokpos = 0;
//lexer->rewind = true;
strcpy(token, "EOF");
return JLEXTOK_END;
}
case JLEXCONT_UNDEF:{
switch (type) {
case JLEXTYPE_EOF:{
lexer->context = JLEXCONT_END;
lexer->rewind = true;
break;
}
case JLEXTYPE_BLOCKB:{
strcpy(token, "BLOCK BEGIN");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_BLOCKB;
}
case JLEXTYPE_BLOCKE:{
strcpy(token, "BLOCK END");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_BLOCKE;
}
case JLEXTYPE_ARRB:{
strcpy(token, "ARR BEGIN");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_ARRB;
}
case JLEXTYPE_ARRE:{
strcpy(token, "ARR END");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_ARRE;
}
case JLEXTYPE_SEPAR:{
strcpy(token, "IS");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_SEPAR;
}
case JLEXTYPE_COMMA:{
strcpy(token, "NEXT");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_NEXT;
}
case JLEXTYPE_WORDL:{
lexer->tokpos = 0;
//char* prefix = "STRING ";
//strcpy(token, prefix);
//lexer->tokpos =+ strlen(prefix);
lexer->context = JLEXCONT_WORD;
break;
}
case JLEXTYPE_NUM:{
lexer->tokpos = 0;
//char* prefix = "NUMBER ";
//strcpy(token, prefix);
//lexer->tokpos =+ strlen(prefix);
token[lexer->tokpos++] = lexer->letter;
lexer->context = JLEXCONT_NUM;
break;
}
case JLEXTYPE_CHAR:{
lexer->tokpos = 0;
char* prefix = "WTF? ";
strcpy(token, prefix);
lexer->tokpos = +strlen(prefix);
token[lexer->tokpos++] = lexer->letter;
lexer->context = JLEXCONT_UNKNOW;
break;
}
}
break;
}
case JLEXCONT_WORD:{
int newcontext = lexer->context;
switch (type) {
case JLEXTYPE_EOF:
case JLEXTYPE_WORDL:{
token[lexer->tokpos++] = '\0';
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_WORD;
}
}
lexer->context = newcontext;
token[lexer->tokpos++] = lexer->letter;
break;
}
case JLEXCONT_NUM:{
int newcontext = lexer->context;
switch (type) {
case JLEXTYPE_NUM:
break;
case JLEXTYPE_EOF:
default:
token[lexer->tokpos++] = '\0';
lexer->rewind = true;
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_NUMB;
}
lexer->context = newcontext;
token[lexer->tokpos++] = lexer->letter;
break;
}
case JLEXCONT_UNKNOW:{
int newcontext = lexer->context;
switch (type) {
case JLEXTYPE_CHAR:
case JLEXTYPE_NUM:
break;
case JLEXTYPE_EOF:
default:{
token[lexer->tokpos++] = '\0';
lexer->rewind = true;
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_UNKNOW;
}
}
lexer->context = newcontext;
token[lexer->tokpos++] = lexer->letter;
break;
}
}
}
strcpy(token, "UNDEF");
return JLEXTOK_UNDEF;
}
void jlexer_destroy(jlexer_t * lexer) {
(void)lexer;
}