255 lines
8.1 KiB
C
255 lines
8.1 KiB
C
/*
|
|
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include <cllexer.h>
|
|
|
|
#define CLLEXCONT_UNDEF 0
|
|
#define CLLEXCONT_KEY 1
|
|
#define CLLEXCONT_WORD 2
|
|
#define CLLEXCONT_SPACE 3
|
|
#define CLLEXCONT_SEP 4
|
|
#define CLLEXCONT_END 5
|
|
#define CLLEXCONT_LWORD 6
|
|
|
|
#define CLLEXTYPE_UNDEF 0
|
|
#define CLLEXTYPE_KEY 1
|
|
#define CLLEXTYPE_CHAR 2
|
|
#define CLLEXTYPE_SEP 3
|
|
#define CLLEXTYPE_SPACE 4
|
|
#define CLLEXTYPE_WORDL 5
|
|
#define CLLEXTYPE_SCR 6
|
|
#define CLLEXTYPE_EOF 7
|
|
|
|
static int get_ltype(char letter) {
|
|
switch (letter) {
|
|
case EOF:
|
|
return CLLEXTYPE_EOF;
|
|
case '"':
|
|
return CLLEXTYPE_WORDL;
|
|
case '\\':
|
|
return CLLEXTYPE_SCR;
|
|
case '-':
|
|
return CLLEXTYPE_KEY;
|
|
case ' ':
|
|
case '\t':
|
|
return CLLEXTYPE_SPACE;
|
|
case '=':
|
|
return CLLEXTYPE_SEP;
|
|
default:
|
|
return CLLEXTYPE_CHAR;
|
|
}
|
|
return CLLEXTYPE_UNDEF;
|
|
}
|
|
|
|
void cllexer_init(cllexer_t * lexer) {
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->tokpos = 0;
|
|
lexer->letter = EOF;
|
|
lexer->rewind = false;
|
|
lexer->argstr = NULL;
|
|
lexer->arglen = 0;
|
|
lexer->argpos = 0;
|
|
lexer->screen = false;
|
|
}
|
|
|
|
void cllexer_reset(cllexer_t * lexer, char* argstr) {
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->tokpos = 0;
|
|
lexer->letter = EOF;
|
|
lexer->rewind = false;
|
|
lexer->argstr = argstr;
|
|
lexer->arglen = strlen(argstr);
|
|
lexer->argpos = 0;
|
|
lexer->screen = false;
|
|
}
|
|
|
|
int cllexer_gettoken(cllexer_t * lexer, char* token) {
|
|
while (true) {
|
|
if (!lexer->rewind) {
|
|
if (lexer->argpos >= lexer->arglen) {
|
|
lexer->letter = EOF;
|
|
} else {
|
|
lexer->letter = lexer->argstr[lexer->argpos++];
|
|
}
|
|
}
|
|
lexer->rewind = false;
|
|
|
|
int type = get_ltype(lexer->letter);
|
|
|
|
switch (lexer->context) {
|
|
case CLLEXCONT_END:{
|
|
lexer->context = CLLEXCONT_END;
|
|
lexer->tokpos = 0;
|
|
strcpy(token, "END");
|
|
return CLLEXTOK_END;
|
|
}
|
|
case CLLEXCONT_UNDEF:{
|
|
switch (type) {
|
|
case CLLEXTYPE_WORDL:{
|
|
lexer->tokpos = 0;
|
|
lexer->context = CLLEXCONT_LWORD;
|
|
break;
|
|
}
|
|
case CLLEXTYPE_EOF:{
|
|
lexer->tokpos = 0;
|
|
lexer->context = CLLEXCONT_END;
|
|
lexer->rewind = true;
|
|
break;
|
|
}
|
|
case CLLEXTYPE_KEY:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = CLLEXCONT_KEY;
|
|
break;
|
|
}
|
|
case CLLEXTYPE_SEP:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = CLLEXCONT_SEP;
|
|
break;
|
|
}
|
|
case CLLEXTYPE_SCR: {
|
|
lexer->tokpos = 0;
|
|
lexer->screen = true;
|
|
lexer->context = CLLEXCONT_WORD;
|
|
break;
|
|
}
|
|
case CLLEXTYPE_CHAR:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = CLLEXCONT_WORD;
|
|
break;
|
|
}
|
|
case CLLEXTYPE_SPACE:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = CLLEXCONT_SPACE;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CLLEXCONT_KEY:{
|
|
switch (type) {
|
|
case CLLEXTYPE_KEY:
|
|
break;
|
|
case CLLEXTYPE_EOF:
|
|
default:{
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
return CLLEXTOK_KEY;
|
|
}
|
|
}
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case CLLEXCONT_WORD:{
|
|
switch (type) {
|
|
case CLLEXTYPE_SCR: {
|
|
if (lexer->screen == true) {
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->screen = false;
|
|
} else {
|
|
lexer->screen = true;
|
|
}
|
|
continue;
|
|
}
|
|
case CLLEXTYPE_CHAR:
|
|
break;
|
|
case CLLEXTYPE_EOF:
|
|
default: {
|
|
if (lexer->screen == true) {
|
|
lexer->screen = false;
|
|
break;
|
|
}
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
return CLLEXTOK_WORD;
|
|
}
|
|
}
|
|
lexer->screen = false;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case CLLEXCONT_LWORD:{
|
|
switch (type) {
|
|
case CLLEXTYPE_SCR:
|
|
if (lexer->screen == true) {
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->screen = false;
|
|
} else {
|
|
lexer->screen = true;
|
|
}
|
|
continue;
|
|
case CLLEXTYPE_WORDL:
|
|
if (lexer->screen == true) {
|
|
lexer->screen = false;
|
|
break;
|
|
}
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->screen = false;
|
|
return CLLEXTOK_WORD;
|
|
case CLLEXTYPE_EOF:
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
lexer->screen = false;
|
|
return CLLEXTOK_WORD;
|
|
default:{
|
|
break;
|
|
}
|
|
}
|
|
lexer->screen = false;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case CLLEXCONT_SPACE:{
|
|
switch (type) {
|
|
case CLLEXTYPE_SPACE:
|
|
break;
|
|
case CLLEXTYPE_EOF:
|
|
default:{
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
return CLLEXTOK_SPACE;
|
|
}
|
|
}
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case CLLEXCONT_SEP:{
|
|
switch (type) {
|
|
case CLLEXTYPE_SEP:
|
|
break;
|
|
case CLLEXTYPE_EOF:
|
|
default:{
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CLLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
return CLLEXTOK_SEP;
|
|
}
|
|
}
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
strcpy(token, "UNDEF");
|
|
return CLLEXTOK_UNDEF;
|
|
}
|
|
|
|
void cllexer_destroy(cllexer_t * lexer) {
|
|
(void)lexer;
|
|
}
|