Files
cworker/jlexer.c
2023-08-18 22:38:43 +02:00

217 lines
6.4 KiB
C

/*
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
*/
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <rcache.h>
#include <jlexer.h>
#define JLEXCONT_UNDEF 0
#define JLEXCONT_WORD 1
#define JLEXCONT_BLOCKB 2
#define JLEXCONT_BLOCKE 3
#define JLEXCONT_SEPAR 4
#define JLEXCONT_NUM 5
#define JLEXCONT_UNKNOW 6
#define JLEXCONT_END 9
#define JLEXTYPE_UNDEF 0
#define JLEXTYPE_BLOCKB 1
#define JLEXTYPE_BLOCKE 2
#define JLEXTYPE_WORDL 3
#define JLEXTYPE_SPACE 4
#define JLEXTYPE_SEPAR 5
#define JLEXTYPE_NUM 6
#define JLEXTYPE_COMMA 7
#define JLEXTYPE_CHAR 8
#define JLEXTYPE_EOF 9
static int get_ltype(char letter) {
switch (letter) {
case EOF:
return JLEXTYPE_EOF;
case '{':
return JLEXTYPE_BLOCKB;
case '}':
return JLEXTYPE_BLOCKE;
case '"':
return JLEXTYPE_WORDL;
case ' ':
case '\t':
case '\n':
return JLEXTYPE_SPACE;
case ':':
return JLEXTYPE_SEPAR;
case ',':
return JLEXTYPE_COMMA;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '0':
case '.':
case 'E':
case 'e':
case '+':
case '-':
return JLEXTYPE_NUM;
default:
return JLEXTYPE_CHAR;
}
return JLEXTYPE_UNDEF;
}
void jlexer_init(jlexer_t * lexer, rcache_t * cache) {
lexer->cache = cache;
lexer->context = JLEXCONT_UNDEF;
lexer->tokpos = 0;
lexer->letter = EOF;
lexer->rewind = false;
}
int jlexer_gettoken(jlexer_t * lexer, char* token) {
while (true) {
if (!lexer->rewind) {
lexer->letter = rcache_getc(lexer->cache);
}
lexer->rewind = false;
int type = get_ltype(lexer->letter);
switch (lexer->context) {
case JLEXCONT_END:{
lexer->context = JLEXCONT_END;
lexer->tokpos = 0;
//lexer->rewind = true;
strcpy(token, "EOF");
return JLEXTOK_END;
}
case JLEXCONT_UNDEF:{
switch (type) {
case JLEXTYPE_EOF:{
lexer->context = JLEXCONT_END;
lexer->rewind = true;
break;
}
case JLEXTYPE_BLOCKB:{
strcpy(token, "BLOCK BEGIN");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_BLOCKB;
}
case JLEXTYPE_BLOCKE:{
strcpy(token, "BLOCK END");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_BLOCKE;
}
case JLEXTYPE_SEPAR:{
strcpy(token, "IS");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_SEPAR;
}
case JLEXTYPE_COMMA:{
strcpy(token, "NEXT");
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_NEXT;
}
case JLEXTYPE_WORDL:{
lexer->tokpos = 0;
//char* prefix = "STRING ";
//strcpy(token, prefix);
//lexer->tokpos =+ strlen(prefix);
lexer->context = JLEXCONT_WORD;
break;
}
case JLEXTYPE_NUM:{
lexer->tokpos = 0;
//char* prefix = "NUMBER ";
//strcpy(token, prefix);
//lexer->tokpos =+ strlen(prefix);
token[lexer->tokpos++] = lexer->letter;
lexer->context = JLEXCONT_NUM;
break;
}
case JLEXTYPE_CHAR:{
lexer->tokpos = 0;
char* prefix = "WTF? ";
strcpy(token, prefix);
lexer->tokpos = +strlen(prefix);
token[lexer->tokpos++] = lexer->letter;
lexer->context = JLEXCONT_UNKNOW;
break;
}
}
break;
}
case JLEXCONT_WORD:{
int newcontext = lexer->context;
switch (type) {
case JLEXTYPE_EOF:
case JLEXTYPE_WORDL:{
token[lexer->tokpos++] = '\0';
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_WORD;
}
}
lexer->context = newcontext;
token[lexer->tokpos++] = lexer->letter;
break;
}
case JLEXCONT_NUM:{
int newcontext = lexer->context;
switch (type) {
case JLEXTYPE_NUM:
break;
case JLEXTYPE_EOF:
default:
token[lexer->tokpos++] = '\0';
lexer->rewind = true;
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_NUMB;
}
lexer->context = newcontext;
token[lexer->tokpos++] = lexer->letter;
break;
}
case JLEXCONT_UNKNOW:{
int newcontext = lexer->context;
switch (type) {
case JLEXTYPE_CHAR:
case JLEXTYPE_NUM:
break;
case JLEXTYPE_EOF:
default:{
token[lexer->tokpos++] = '\0';
lexer->rewind = true;
lexer->context = JLEXCONT_UNDEF;
return JLEXTOK_UNKNOW;
}
}
lexer->context = newcontext;
token[lexer->tokpos++] = lexer->letter;
break;
}
}
}
strcpy(token, "UNDEF");
return JLEXTOK_UNDEF;
}
void jlexer_destroy(jlexer_t * lexer) {
(void)lexer;
}