256 lines
8.2 KiB
C
256 lines
8.2 KiB
C
/*
|
|
* Copyright 2023 Oleg Borodin <borodin@unix7.org>
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include <rcache.h>
|
|
#include <cflexer.h>
|
|
|
|
|
|
#define CFLEXCONT_COMM 1
|
|
#define CFLEXCONT_END 2
|
|
#define CFLEXCONT_LWORD 3
|
|
#define CFLEXCONT_SEPAR 4
|
|
#define CFLEXCONT_SPACE 5
|
|
#define CFLEXCONT_UNDEF 6
|
|
#define CFLEXCONT_UNKNOW 7
|
|
#define CFLEXCONT_WORD 8
|
|
#define CFLEXCONT_NEWLN 9
|
|
|
|
#define CFLEXTYPE_CHAR 1
|
|
#define CFLEXTYPE_COMM 2
|
|
#define CFLEXTYPE_EOF 3
|
|
#define CFLEXTYPE_NEWLN 4
|
|
#define CFLEXTYPE_SCR 5
|
|
#define CFLEXTYPE_SEPAR 6
|
|
#define CFLEXTYPE_SPACE 7
|
|
#define CFLEXTYPE_UNDEF 8
|
|
#define CFLEXTYPE_WORDL 9
|
|
|
|
static int get_ltype(char letter) {
|
|
switch (letter) {
|
|
case EOF:
|
|
return CFLEXTYPE_EOF;
|
|
case ' ':
|
|
case '\t':
|
|
return CFLEXTYPE_SPACE;
|
|
case '=':
|
|
return CFLEXTYPE_SEPAR;
|
|
case '\n':
|
|
return CFLEXTYPE_NEWLN;
|
|
case '#':
|
|
return CFLEXTYPE_COMM;
|
|
case '"':
|
|
return CFLEXTYPE_WORDL;
|
|
case '\\':
|
|
return CFLEXTYPE_SCR;
|
|
default:
|
|
return CFLEXTYPE_CHAR;
|
|
}
|
|
return CFLEXTYPE_UNDEF;
|
|
}
|
|
|
|
void cflexer_init(cflexer_t * lexer, rcache_t * cache) {
|
|
lexer->cache = cache;
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
lexer->tokpos = 0;
|
|
lexer->letter = EOF;
|
|
lexer->rewind = false;
|
|
lexer->screen = false;
|
|
}
|
|
|
|
int cflexer_gettoken(cflexer_t * lexer, char* token) {
|
|
|
|
while (true) {
|
|
if (!lexer->rewind) {
|
|
lexer->letter = rcache_getc(lexer->cache);
|
|
}
|
|
lexer->rewind = false;
|
|
int type = get_ltype(lexer->letter);
|
|
|
|
switch (lexer->context) {
|
|
case CFLEXCONT_END:{
|
|
lexer->context = CFLEXCONT_END;
|
|
lexer->tokpos = 0;
|
|
lexer->rewind = true;
|
|
strcpy(token, "END");
|
|
return CFLEXTOK_END;
|
|
}
|
|
case CFLEXCONT_UNDEF:{
|
|
switch (type) {
|
|
case CFLEXTYPE_EOF:{
|
|
lexer->context = CFLEXCONT_END;
|
|
lexer->rewind = true;
|
|
break;
|
|
}
|
|
case CFLEXTYPE_CHAR:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->context = CFLEXCONT_WORD;
|
|
break;
|
|
}
|
|
case CFLEXTYPE_SEPAR:{
|
|
strcpy(token, "=");
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
return CFLEXTOK_SEPAR;
|
|
}
|
|
case CFLEXTYPE_NEWLN:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos] = '\0';
|
|
strcpy(token, "NEWLN");
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
return CFLEXTOK_NEXT;
|
|
}
|
|
case CFLEXTYPE_SPACE:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos] = '\0';
|
|
lexer->context = CFLEXCONT_SPACE;
|
|
break;
|
|
}
|
|
case CFLEXTYPE_COMM:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos] = '\0';
|
|
lexer->context = CFLEXCONT_COMM;
|
|
break;
|
|
}
|
|
case CFLEXTYPE_WORDL:{
|
|
lexer->tokpos = 0;
|
|
token[lexer->tokpos] = '\0';
|
|
lexer->context = CFLEXCONT_LWORD;
|
|
break;
|
|
}
|
|
case CFLEXTYPE_SCR: {
|
|
lexer->tokpos = 0;
|
|
lexer->screen = true;
|
|
lexer->context = CFLEXCONT_WORD;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CFLEXCONT_WORD:{
|
|
switch (type) {
|
|
case CFLEXTYPE_SCR: {
|
|
if (lexer->screen == true) {
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->screen = false;
|
|
} else {
|
|
lexer->screen = true;
|
|
}
|
|
continue;
|
|
}
|
|
case CFLEXTYPE_CHAR:
|
|
break;
|
|
case CFLEXTYPE_EOF:
|
|
default: {
|
|
if (lexer->screen == true) {
|
|
lexer->screen = false;
|
|
break;
|
|
}
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
lexer->screen = false;
|
|
return CFLEXTOK_WORD;
|
|
}
|
|
}
|
|
lexer->screen = false;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case CFLEXCONT_LWORD:{
|
|
switch (type) {
|
|
case CFLEXTYPE_SCR: {
|
|
if (lexer->screen == true) {
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
lexer->screen = false;
|
|
} else {
|
|
lexer->screen = true;
|
|
}
|
|
continue;
|
|
}
|
|
case CFLEXTYPE_WORDL: {
|
|
if (lexer->screen == true) {
|
|
lexer->screen = false;
|
|
break;
|
|
}
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
lexer->screen = false;
|
|
return CFLEXTOK_WORD;
|
|
}
|
|
case CFLEXTYPE_EOF: {
|
|
token[lexer->tokpos++] = '\0';
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
return CFLEXTOK_WORD;
|
|
}
|
|
default:{
|
|
break;
|
|
}
|
|
}
|
|
lexer->screen = false;
|
|
token[lexer->tokpos++] = lexer->letter;
|
|
break;
|
|
}
|
|
case CFLEXCONT_SPACE:{
|
|
switch (type) {
|
|
case CFLEXTYPE_SPACE:
|
|
break;
|
|
case CFLEXTYPE_EOF:
|
|
default: {
|
|
lexer->rewind = true;
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
continue;
|
|
//lexer->context = CFLEXCONT_UNDEF;
|
|
//lexer->rewind = true;
|
|
//strcpy(token, "SPACE");
|
|
//return CFLEXTOK_SPACE;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CFLEXCONT_NEWLN:{
|
|
switch (type) {
|
|
case CFLEXTYPE_NEWLN:
|
|
break;
|
|
case CFLEXTYPE_EOF:
|
|
default: {
|
|
lexer->rewind = true;
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
strcpy(token, "NEWSLN");
|
|
return CFLEXTOK_NEXT;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case CFLEXCONT_COMM:{
|
|
switch (type) {
|
|
case CFLEXTYPE_EOF:
|
|
case CFLEXTYPE_NEWLN:
|
|
lexer->context = CFLEXCONT_UNDEF;
|
|
lexer->rewind = true;
|
|
strcpy(token, "COMMENT");
|
|
return CFLEXTOK_COMM;
|
|
default: {
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
strcpy(token, "UNDEF");
|
|
return CFLEXTOK_UNDEF;
|
|
}
|
|
|
|
void cflexer_destroy(cflexer_t * lexer) {
|
|
(void)lexer;
|
|
}
|