Files
cworker/clib/cllexer.c
2023-08-31 03:26:16 +02:00

255 lines
8.1 KiB
C

/*
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
*/
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <cllexer.h>
#define CLLEXCONT_UNDEF 0
#define CLLEXCONT_KEY 1
#define CLLEXCONT_WORD 2
#define CLLEXCONT_SPACE 3
#define CLLEXCONT_SEP 4
#define CLLEXCONT_END 5
#define CLLEXCONT_LWORD 6
#define CLLEXTYPE_UNDEF 0
#define CLLEXTYPE_KEY 1
#define CLLEXTYPE_CHAR 2
#define CLLEXTYPE_SEP 3
#define CLLEXTYPE_SPACE 4
#define CLLEXTYPE_WORDL 5
#define CLLEXTYPE_SCR 6
#define CLLEXTYPE_EOF 7
static int get_ltype(char letter) {
switch (letter) {
case EOF:
return CLLEXTYPE_EOF;
case '"':
return CLLEXTYPE_WORDL;
case '\\':
return CLLEXTYPE_SCR;
case '-':
return CLLEXTYPE_KEY;
case ' ':
case '\t':
return CLLEXTYPE_SPACE;
case '=':
return CLLEXTYPE_SEP;
default:
return CLLEXTYPE_CHAR;
}
return CLLEXTYPE_UNDEF;
}
void cllexer_init(cllexer_t * lexer) {
lexer->context = CLLEXCONT_UNDEF;
lexer->tokpos = 0;
lexer->letter = EOF;
lexer->rewind = false;
lexer->argstr = NULL;
lexer->arglen = 0;
lexer->argpos = 0;
lexer->screen = false;
}
void cllexer_reset(cllexer_t * lexer, char* argstr) {
lexer->context = CLLEXCONT_UNDEF;
lexer->tokpos = 0;
lexer->letter = EOF;
lexer->rewind = false;
lexer->argstr = argstr;
lexer->arglen = strlen(argstr);
lexer->argpos = 0;
lexer->screen = false;
}
int cllexer_gettoken(cllexer_t * lexer, char* token) {
while (true) {
if (!lexer->rewind) {
if (lexer->argpos >= lexer->arglen) {
lexer->letter = EOF;
} else {
lexer->letter = lexer->argstr[lexer->argpos++];
}
}
lexer->rewind = false;
int type = get_ltype(lexer->letter);
switch (lexer->context) {
case CLLEXCONT_END:{
lexer->context = CLLEXCONT_END;
lexer->tokpos = 0;
strcpy(token, "END");
return CLLEXTOK_END;
}
case CLLEXCONT_UNDEF:{
switch (type) {
case CLLEXTYPE_WORDL:{
lexer->tokpos = 0;
lexer->context = CLLEXCONT_LWORD;
break;
}
case CLLEXTYPE_EOF:{
lexer->tokpos = 0;
lexer->context = CLLEXCONT_END;
lexer->rewind = true;
break;
}
case CLLEXTYPE_KEY:{
lexer->tokpos = 0;
token[lexer->tokpos++] = lexer->letter;
lexer->context = CLLEXCONT_KEY;
break;
}
case CLLEXTYPE_SEP:{
lexer->tokpos = 0;
token[lexer->tokpos++] = lexer->letter;
lexer->context = CLLEXCONT_SEP;
break;
}
case CLLEXTYPE_SCR: {
lexer->tokpos = 0;
lexer->screen = true;
lexer->context = CLLEXCONT_WORD;
break;
}
case CLLEXTYPE_CHAR:{
lexer->tokpos = 0;
token[lexer->tokpos++] = lexer->letter;
lexer->context = CLLEXCONT_WORD;
break;
}
case CLLEXTYPE_SPACE:{
lexer->tokpos = 0;
token[lexer->tokpos++] = lexer->letter;
lexer->context = CLLEXCONT_SPACE;
break;
}
}
break;
}
case CLLEXCONT_KEY:{
switch (type) {
case CLLEXTYPE_KEY:
break;
case CLLEXTYPE_EOF:
default:{
token[lexer->tokpos++] = '\0';
lexer->context = CLLEXCONT_UNDEF;
lexer->rewind = true;
return CLLEXTOK_KEY;
}
}
token[lexer->tokpos++] = lexer->letter;
break;
}
case CLLEXCONT_WORD:{
switch (type) {
case CLLEXTYPE_SCR: {
if (lexer->screen == true) {
token[lexer->tokpos++] = lexer->letter;
lexer->screen = false;
} else {
lexer->screen = true;
}
continue;
}
case CLLEXTYPE_CHAR:
break;
case CLLEXTYPE_EOF:
default: {
if (lexer->screen == true) {
lexer->screen = false;
break;
}
token[lexer->tokpos++] = '\0';
lexer->context = CLLEXCONT_UNDEF;
lexer->rewind = true;
return CLLEXTOK_WORD;
}
}
lexer->screen = false;
token[lexer->tokpos++] = lexer->letter;
break;
}
case CLLEXCONT_LWORD:{
switch (type) {
case CLLEXTYPE_SCR:
if (lexer->screen == true) {
token[lexer->tokpos++] = lexer->letter;
lexer->screen = false;
} else {
lexer->screen = true;
}
continue;
case CLLEXTYPE_WORDL:
if (lexer->screen == true) {
lexer->screen = false;
break;
}
token[lexer->tokpos++] = '\0';
lexer->context = CLLEXCONT_UNDEF;
lexer->screen = false;
return CLLEXTOK_WORD;
case CLLEXTYPE_EOF:
token[lexer->tokpos++] = '\0';
lexer->context = CLLEXCONT_UNDEF;
lexer->rewind = true;
lexer->screen = false;
return CLLEXTOK_WORD;
default:{
break;
}
}
lexer->screen = false;
token[lexer->tokpos++] = lexer->letter;
break;
}
case CLLEXCONT_SPACE:{
switch (type) {
case CLLEXTYPE_SPACE:
break;
case CLLEXTYPE_EOF:
default:{
token[lexer->tokpos++] = '\0';
lexer->context = CLLEXCONT_UNDEF;
lexer->rewind = true;
return CLLEXTOK_SPACE;
}
}
token[lexer->tokpos++] = lexer->letter;
break;
}
case CLLEXCONT_SEP:{
switch (type) {
case CLLEXTYPE_SEP:
break;
case CLLEXTYPE_EOF:
default:{
token[lexer->tokpos++] = '\0';
lexer->context = CLLEXCONT_UNDEF;
lexer->rewind = true;
return CLLEXTOK_SEP;
}
}
token[lexer->tokpos++] = lexer->letter;
break;
}
}
}
strcpy(token, "UNDEF");
return CLLEXTOK_UNDEF;
}
void cllexer_destroy(cllexer_t * lexer) {
(void)lexer;
}