/* * Copyright (c) 2024 Ross Cunniff * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include <stdio.h> #include <ctype.h> #include "tokmach.h" #include "unicode.h" #define TOK_DEBUG 0 void OADL_InitTokenState(TokenState *st) { static const TokenState initState = {0}; *st = initState; st->state = 1; st->res = TOK_UNKNOWN; st->doAdd = 1; } // Special character groups #define ID -1 // name character A-Z 0-9 $ _ #define AL -2 // Non-numeric ID character A-Z $ _ #define DG -3 // Digit 0-9 #define HX -4 // Hex digit 0-9 A-F #define $_ -5 // Ignored underscore #define IS -6 // Suffix character: BLS #define FS -7 // Suffix character: DH #define NY -8 // Any character #define LH -9 // Suffix character: LH #define BN -10 // Binary digit 0 or 1 // Explicit characters used in table #define $0 '0' #define $L 'L' #define $X 'X' #define $E 'E' #define $P 'P' #define $B 'B' #define $S 'S' #define $d '.' // Dot #define $q '"' // Quote #define $a '\'' // Apostrophe #define $p '+' // Plus #define $m '-' // Minus #define $U 'U' // Special states #define FI NY,0 #define EE NY,-1 #define ST -2 // These two are pseudo-states; the machine #define CH -3 // does special work if they are entered // Actions #define INT 1 #define UNS 2 #define ISZ 3 #define HEX 4 #define NAM 5 #define FLT 6 #define FSZ 7 #define DOT 8 #define ELL 9 #define HFP 10 #define HFS 11 #define BIN 12 #define MAXCH 11 // A token state in the state machine typedef struct { int stateNum; int action; short transit[MAXCH*2]; } TokState; static TokState tokMach[] = { { 0 }, // Not a state { 1,0, {$0, 7,$L,24,DG,15,AL,11,$d,23, EE}}, { 2,ISZ,{BN, 3, FI}}, { 3,BIN,{$U, 8,IS,13,BN, 3,$_, 4, FI}}, { 4,0, {BN, 3,$_, 4, EE}}, { 5,0, {HX,10, EE}}, { 6,0, {$_, 6,HX,10, EE}}, { 7,INT,{$X, 5,$E,19,$B, 2,FS,18,IS,13,$U, 8,$d,16,DG,15,$_,12, FI}}, { 8,UNS,{IS,13, FI}}, { 9,0, {$B,13, FI}}, {10,HEX,{HX,10,$_, 6,$U, 8,$S, 9,$L,13,$P,32,$d,28, FI}}, {11,NAM,{ID,11, FI}}, {12,0, {$_,12,DG,15, EE}}, {13,ISZ,{ FI}}, {14,0, {$_,14,DG,17, EE}}, {15,INT,{$E,19,FS,18,$d,16,DG,15,$_,12,$U, 8,IS,13, FI}}, {16,FLT,{$E,19,FS,18,DG,17, FI}}, {17,FLT,{$E,19,$_,14,DG,17,FS,18, FI}}, {18,FSZ,{ FI}}, {19,0, {$p,20,$m,20,DG,21, EE}}, {20,0, {DG,21, EE}}, {21,FLT,{DG,21,$_,22,FS,18, FI}}, {22,0, {DG,21,$_,22, EE}}, {23,DOT,{DG,17,$d,26, FI}}, {24,NAM,{$q,ST,$a,CH,ID,11, FI}}, {25,ELL,{ FI}}, {26,0, {$d,25, EE}}, {27,HFS,{ FI}}, {28,HFP,{LH,27,HX,34,$P,32, FI}}, {29,0, {DG,30,$_,29, EE}}, {30,HFP,{LH,27,DG,30,$_,29, FI}}, {31,0, {DG,30, EE}}, {32,0, {$p,31,$m,31,DG,30, EE}}, {33,0, {$_,33,HX,34, EE}}, {34,HFP,{LH,27,HX,34,$_,33,$P,32, FI}}, }; #define UM(m) (1 << UC_CHAR_TYPE_##m) #define UM_LETTER (UM(Lu)|UM(Ll)|UM(Lt)|UM(Lo)) #define UM_DIGIT (UM(Nd)|UM(Nl)|UM(No)) #define ISALPHA(ch) ((1 << OADL_ucCharType(ch)) & UM_LETTER) #define ISALNUM(ch) ((1 << OADL_ucCharType(ch)) & (UM_LETTER|UM_DIGIT)) #define ISDIGIT(ch) (((ch) > '0') && ((ch) <= '9')) #define TOUPPER(ch) OADL_ucToUpper(ch) void OADL_TokenStateMachine(TokenState *st, int ch) { int i, si, done; int cu; short *transit; si = st->state; cu = TOUPPER(ch); transit = tokMach[si].transit; #if TOK_DEBUG printf("ST: %2d; CU: '%c'; ACT: %d", si, (cu >= 32) ? cu : '?', tokMach[si].action); #endif switch (tokMach[si].action ) { case INT : st->res = TOK_INTCON; break; case UNS : st->isUnsigned = 1; break; case ISZ : st->isSized = 1; break; case BIN : st->isBinary = 1; st->isSized = 0; break; case HEX : st->isHex = 1; break; case NAM : st->res = TOK_NAME; break; case FLT : st->res = TOK_FLOATCON; break; case HFP : st->res = TOK_FLOATCON; break; case FSZ : st->res = TOK_FLOATCON; st->isSized = 1; break; case HFS : st->res = TOK_FLOATCON; st->isSized = 1; break; case DOT : st->res = TOK_DOT; break; case ELL : st->res = TOK_DOTDOTDOT; break; default : break; } done = 0; st->doAdd = 1; for (i = 0; transit[i] && !done; i += 2) { switch (transit[i]) { case ID : done = ISALNUM(ch)||(ch=='$')||(ch=='_'); break; case AL : done = ISALPHA(ch)||(ch=='$')||(ch=='_'); break; case DG : done = isdigit(ch); break; case BN : done = (ch=='0')||(ch=='1'); break; case IS : done = (cu=='B')||(cu=='L')||(cu=='S'); break; case FS : done = (cu=='D')||(cu=='H'); break; case LH : done = (cu=='L')||(cu=='H'); break; case HX : done = isxdigit(ch); break; case $_ : done = (ch=='_'); if(done) st->doAdd = 0; break; case $U : done = (cu=='U'); if(done) st->doAdd = 0; break; case NY : done = 1; break; default : done = (cu == transit[i]); break; } if (done) { st->state = transit[i+1]; } } #if TOK_DEBUG printf(" => ST: %2d; ADD: %d; i: %d\n", st->state, st->doAdd, i); #endif // Clean-up for LSTRING/LCHAR switch (st->state) { case ST : st->res = TOK_LSTRING; st->state = 0; break; case CH : st->res = TOK_LCHARCON; st->state = 0; break; default : break; } }