/*
* Copyright (c) 2024 Ross Cunniff
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdio.h>
#include <ctype.h>
#include "tokmach.h"
#include "unicode.h"
#define TOK_DEBUG 0
void OADL_InitTokenState(TokenState *st)
{
static const TokenState initState = {0};
*st = initState;
st->state = 1;
st->res = TOK_UNKNOWN;
st->doAdd = 1;
}
// Special character groups
#define ID -1 // name character A-Z 0-9 $ _
#define AL -2 // Non-numeric ID character A-Z $ _
#define DG -3 // Digit 0-9
#define HX -4 // Hex digit 0-9 A-F
#define $_ -5 // Ignored underscore
#define IS -6 // Suffix character: BLS
#define FS -7 // Suffix character: DH
#define NY -8 // Any character
#define LH -9 // Suffix character: LH
#define BN -10 // Binary digit 0 or 1
// Explicit characters used in table
#define $0 '0'
#define $L 'L'
#define $X 'X'
#define $E 'E'
#define $P 'P'
#define $B 'B'
#define $S 'S'
#define $d '.' // Dot
#define $q '"' // Quote
#define $a '\'' // Apostrophe
#define $p '+' // Plus
#define $m '-' // Minus
#define $U 'U'
// Special states
#define FI NY,0
#define EE NY,-1
#define ST -2 // These two are pseudo-states; the machine
#define CH -3 // does special work if they are entered
// Actions
#define INT 1
#define UNS 2
#define ISZ 3
#define HEX 4
#define NAM 5
#define FLT 6
#define FSZ 7
#define DOT 8
#define ELL 9
#define HFP 10
#define HFS 11
#define BIN 12
#define MAXCH 11
// A token state in the state machine
typedef struct {
int stateNum;
int action;
short transit[MAXCH*2];
} TokState;
static TokState tokMach[] = {
{ 0 }, // Not a state
{ 1,0, {$0, 7,$L,24,DG,15,AL,11,$d,23, EE}},
{ 2,ISZ,{BN, 3, FI}},
{ 3,BIN,{$U, 8,IS,13,BN, 3,$_, 4, FI}},
{ 4,0, {BN, 3,$_, 4, EE}},
{ 5,0, {HX,10, EE}},
{ 6,0, {$_, 6,HX,10, EE}},
{ 7,INT,{$X, 5,$E,19,$B, 2,FS,18,IS,13,$U, 8,$d,16,DG,15,$_,12, FI}},
{ 8,UNS,{IS,13, FI}},
{ 9,0, {$B,13, FI}},
{10,HEX,{HX,10,$_, 6,$U, 8,$S, 9,$L,13,$P,32,$d,28, FI}},
{11,NAM,{ID,11, FI}},
{12,0, {$_,12,DG,15, EE}},
{13,ISZ,{ FI}},
{14,0, {$_,14,DG,17, EE}},
{15,INT,{$E,19,FS,18,$d,16,DG,15,$_,12,$U, 8,IS,13, FI}},
{16,FLT,{$E,19,FS,18,DG,17, FI}},
{17,FLT,{$E,19,$_,14,DG,17,FS,18, FI}},
{18,FSZ,{ FI}},
{19,0, {$p,20,$m,20,DG,21, EE}},
{20,0, {DG,21, EE}},
{21,FLT,{DG,21,$_,22,FS,18, FI}},
{22,0, {DG,21,$_,22, EE}},
{23,DOT,{DG,17,$d,26, FI}},
{24,NAM,{$q,ST,$a,CH,ID,11, FI}},
{25,ELL,{ FI}},
{26,0, {$d,25, EE}},
{27,HFS,{ FI}},
{28,HFP,{LH,27,HX,34,$P,32, FI}},
{29,0, {DG,30,$_,29, EE}},
{30,HFP,{LH,27,DG,30,$_,29, FI}},
{31,0, {DG,30, EE}},
{32,0, {$p,31,$m,31,DG,30, EE}},
{33,0, {$_,33,HX,34, EE}},
{34,HFP,{LH,27,HX,34,$_,33,$P,32, FI}},
};
#define UM(m) (1 << UC_CHAR_TYPE_##m)
#define UM_LETTER (UM(Lu)|UM(Ll)|UM(Lt)|UM(Lo))
#define UM_DIGIT (UM(Nd)|UM(Nl)|UM(No))
#define ISALPHA(ch) ((1 << OADL_ucCharType(ch)) & UM_LETTER)
#define ISALNUM(ch) ((1 << OADL_ucCharType(ch)) & (UM_LETTER|UM_DIGIT))
#define ISDIGIT(ch) (((ch) > '0') && ((ch) <= '9'))
#define TOUPPER(ch) OADL_ucToUpper(ch)
void OADL_TokenStateMachine(TokenState *st, int ch)
{
int i, si, done;
int cu;
short *transit;
si = st->state;
cu = TOUPPER(ch);
transit = tokMach[si].transit;
#if TOK_DEBUG
printf("ST: %2d; CU: '%c'; ACT: %d", si,
(cu >= 32) ? cu : '?', tokMach[si].action);
#endif
switch (tokMach[si].action ) {
case INT : st->res = TOK_INTCON; break;
case UNS : st->isUnsigned = 1; break;
case ISZ : st->isSized = 1; break;
case BIN : st->isBinary = 1; st->isSized = 0; break;
case HEX : st->isHex = 1; break;
case NAM : st->res = TOK_NAME; break;
case FLT : st->res = TOK_FLOATCON; break;
case HFP : st->res = TOK_FLOATCON; break;
case FSZ : st->res = TOK_FLOATCON; st->isSized = 1; break;
case HFS : st->res = TOK_FLOATCON; st->isSized = 1; break;
case DOT : st->res = TOK_DOT; break;
case ELL : st->res = TOK_DOTDOTDOT; break;
default : break;
}
done = 0;
st->doAdd = 1;
for (i = 0; transit[i] && !done; i += 2) {
switch (transit[i]) {
case ID : done = ISALNUM(ch)||(ch=='$')||(ch=='_'); break;
case AL : done = ISALPHA(ch)||(ch=='$')||(ch=='_'); break;
case DG : done = isdigit(ch); break;
case BN : done = (ch=='0')||(ch=='1'); break;
case IS : done = (cu=='B')||(cu=='L')||(cu=='S'); break;
case FS : done = (cu=='D')||(cu=='H'); break;
case LH : done = (cu=='L')||(cu=='H'); break;
case HX : done = isxdigit(ch); break;
case $_ : done = (ch=='_'); if(done) st->doAdd = 0; break;
case $U : done = (cu=='U'); if(done) st->doAdd = 0; break;
case NY : done = 1; break;
default : done = (cu == transit[i]); break;
}
if (done) {
st->state = transit[i+1];
}
}
#if TOK_DEBUG
printf(" => ST: %2d; ADD: %d; i: %d\n", st->state, st->doAdd, i);
#endif
// Clean-up for LSTRING/LCHAR
switch (st->state) {
case ST : st->res = TOK_LSTRING; st->state = 0; break;
case CH : st->res = TOK_LCHARCON; st->state = 0; break;
default : break;
}
}