241 lines
7.5 KiB
C
241 lines
7.5 KiB
C
/*
|
|
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include <rcache.h>
|
|
#include <jlexer.h>
|
|
|
|
#define JLEXCONT_UNDEF 0x00
|
|
#define JLEXCONT_WORD 0x01
|
|
#define JLEXCONT_BLOCKB 0x02
|
|
#define JLEXCONT_BLOCKE 0x03
|
|
#define JLEXCONT_SEPAR 0x04
|
|
#define JLEXCONT_NUM 0x05
|
|
#define JLEXCONT_RAWSTR 0x06
|
|
#define JLEXCONT_END 0x99
|
|
|
|
#define JLEXTYPE_UNDEF 0x00
|
|
#define JLEXTYPE_BLOCKB 0x01
|
|
#define JLEXTYPE_BLOCKE 0x02
|
|
#define JLEXTYPE_WORDL 0x03
|
|
#define JLEXTYPE_SPACE 0x04
|
|
#define JLEXTYPE_SEPAR 0x05
|
|
#define JLEXTYPE_NUM 0x06
|
|
#define JLEXTYPE_COMMA 0x07
|
|
#define JLEXTYPE_CHAR 0x08
|
|
#define JLEXTYPE_ARRB 0x11
|
|
#define JLEXTYPE_ARRE 0x12
|
|
#define JLEXTYPE_EOF 0x99
|
|
|
|
|
|
static int get_ltype(char letter) {
|
|
switch (letter) {
|
|
case EOF:
|
|
return JLEXTYPE_EOF;
|
|
case '{':
|
|
return JLEXTYPE_BLOCKB;
|
|
case '}':
|
|
return JLEXTYPE_BLOCKE;
|
|
case '[':
|
|
return JLEXTYPE_ARRB;
|
|
case ']':
|
|
return JLEXTYPE_ARRE;
|
|
case '"':
|
|
return JLEXTYPE_WORDL;
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
return JLEXTYPE_SPACE;
|
|
case ':':
|
|
return JLEXTYPE_SEPAR;
|
|
case ',':
|
|
return JLEXTYPE_COMMA;
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
case '0':
|
|
case '+':
|
|
case '-':
|
|
case '.':
|
|
case 'E':
|
|
case 'e':
|
|
return JLEXTYPE_NUM;
|
|
default:
|
|
return JLEXTYPE_CHAR;
|
|
}
|
|
return JLEXTYPE_UNDEF;
|
|
}
|
|
|
|
void jlexer_init(jlexer_t * lexer, rcache_t * cache) {
|
|
lexer->cache = cache;
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
lexer->tokpos = 0;
|
|
lexer->letter = EOF;
|
|
lexer->rewind = false;
|
|
}
|
|
|
|
int jlexer_gettoken(jlexer_t * lexer, char* token) {
|
|
|
|
while (true) {
|
|
if (!lexer->rewind) {
|
|
lexer->letter = rcache_getc(lexer->cache);
|
|
}
|
|
lexer->rewind = false;
|
|
int type = get_ltype(lexer->letter);
|
|
|
|
switch (lexer->context) {
|
|
case JLEXCONT_END:{
|
|
lexer->context = JLEXCONT_END;
|
|
lexer->tokpos = 0;
|
|
//lexer->rewind = true;
|
|
strcpy(token, "EOF");
|
|
return JLEXTOK_END;
|
|
}
|
|
case JLEXCONT_UNDEF:{
|
|
switch (type) {
|
|
case JLEXTYPE_EOF:{
|
|
lexer->context = JLEXCONT_END;
|
|
lexer->rewind = true;
|
|
break;
|
|
}
|
|
case JLEXTYPE_BLOCKB:{
|
|
strcpy(token, "BLOCK BEGIN");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_BLOCKB;
|
|
}
|
|
case JLEXTYPE_BLOCKE:{
|
|
strcpy(token, "BLOCK END");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_BLOCKE;
|
|
}
|
|
case JLEXTYPE_ARRB:{
|
|
strcpy(token, "ARR BEGIN");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_ARRB;
|
|
}
|
|
case JLEXTYPE_ARRE:{
|
|
strcpy(token, "ARR END");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_ARRE;
|
|
}
|
|
case JLEXTYPE_SEPAR:{
|
|
strcpy(token, "IS");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_SEPAR;
|
|
}
|
|
case JLEXTYPE_COMMA:{
|
|
strcpy(token, "NEXT");
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_NEXT;
|
|
}
|
|
case JLEXTYPE_WORDL:{
|
|
lexer->tokpos = 0;
|
|
//char* prefix = "STRING ";
|
|
//strcpy(token, prefix);
|
|
//lexer->tokpos =+ strlen(prefix);
|
|
lexer->context = JLEXCONT_WORD;
|
|
break;
|
|
}
|
|
case JLEXTYPE_NUM:{
|
|
lexer->tokpos = 0;
|
|
//char* prefix = "NUMBER ";
|
|
//strcpy(token, prefix);
|
|
//lexer->tokpos =+ strlen(prefix);
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = JLEXCONT_NUM;
|
|
break;
|
|
}
|
|
case JLEXTYPE_CHAR:{
|
|
lexer->tokpos = 0;
|
|
char* prefix = "";
|
|
strcpy(token, prefix);
|
|
lexer->tokpos = +strlen(prefix);
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = JLEXCONT_RAWSTR;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case JLEXCONT_WORD:{
|
|
int newcontext = lexer->context;
|
|
|
|
switch (type) {
|
|
case JLEXTYPE_EOF:
|
|
case JLEXTYPE_WORDL:{
|
|
if (lexer->tokpos > 0 && token[lexer->tokpos - 1] == '\\') {
|
|
lexer->tokpos--;
|
|
} else {
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_WORD;
|
|
}
|
|
}
|
|
}
|
|
lexer->context = newcontext;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case JLEXCONT_NUM:{
|
|
int newcontext = lexer->context;
|
|
|
|
switch (type) {
|
|
case JLEXTYPE_NUM:
|
|
break;
|
|
case JLEXTYPE_EOF:
|
|
default:
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->rewind = true;
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
for (int i = 0; i < strlen(token); i++) {
|
|
if (token[i] == 'E' || token[i] == 'e' || token[i] == '.') {
|
|
return JLEXTOK_FLOAT;
|
|
}
|
|
}
|
|
return JLEXTOK_INTEG;
|
|
}
|
|
lexer->context = newcontext;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case JLEXCONT_RAWSTR:{
|
|
int newcontext = lexer->context;
|
|
|
|
switch (type) {
|
|
case JLEXTYPE_CHAR:
|
|
case JLEXTYPE_NUM:
|
|
break;
|
|
case JLEXTYPE_EOF:
|
|
default:{
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->rewind = true;
|
|
lexer->context = JLEXCONT_UNDEF;
|
|
return JLEXTOK_RAWSTR;
|
|
}
|
|
}
|
|
lexer->context = newcontext;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
strcpy(token, "UNDEF");
|
|
return JLEXTOK_UNDEF;
|
|
}
|
|
|
|
void jlexer_destroy(jlexer_t * lexer) {
|
|
(void)lexer;
|
|
}
|