/*
 * Copyright (c) 2024 Ross Cunniff
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include <stdio.h>
#include <ctype.h>
#include "tokmach.h"
#include "unicode.h"

#define TOK_DEBUG 0

void OADL_InitTokenState(TokenState *st)
{
    static const TokenState initState = {0};
    *st = initState;
    st->state = 1;
    st->res = TOK_UNKNOWN;
    st->doAdd = 1;
}

// Special character groups
#define ID     -1      // name character A-Z 0-9 $ _
#define AL     -2      // Non-numeric ID character A-Z $ _
#define DG     -3      // Digit 0-9
#define HX     -4      // Hex digit 0-9 A-F
#define $_     -5      // Ignored underscore
#define IS     -6      // Suffix character: BLS
#define FS     -7      // Suffix character: DH
#define NY     -8      // Any character
#define LH     -9      // Suffix character: LH
#define BN     -10     // Binary digit 0 or 1

// Explicit characters used in table
#define $0      '0'
#define $L      'L'
#define $X      'X'
#define $E      'E'
#define $P      'P'
#define $B      'B'
#define $S      'S'
#define $d      '.'     // Dot
#define $q      '"'     // Quote
#define $a      '\''    // Apostrophe
#define $p      '+'     // Plus
#define $m      '-'     // Minus
#define $U      'U'

// Special states
#define FI      NY,0
#define EE      NY,-1
#define ST      -2      // These two are pseudo-states; the machine
#define CH      -3      // does special work if they are entered

// Actions
#define INT 1
#define UNS 2
#define ISZ 3
#define HEX 4
#define NAM 5
#define FLT 6
#define FSZ 7
#define DOT 8
#define ELL 9
#define HFP 10
#define HFS 11
#define BIN 12

#define MAXCH   11

// A token state in the state machine
typedef struct {
    int stateNum;
    int action;
    short transit[MAXCH*2];
} TokState;

static TokState tokMach[] = {
    { 0 }, // Not a state
    { 1,0,  {$0, 7,$L,24,DG,15,AL,11,$d,23,                               EE}},
    { 2,ISZ,{BN, 3,                                                       FI}},
    { 3,BIN,{$U, 8,IS,13,BN, 3,$_, 4,                                     FI}},
    { 4,0,  {BN, 3,$_, 4,                                                 EE}},
    { 5,0,  {HX,10,                                                       EE}},
    { 6,0,  {$_, 6,HX,10,                                                 EE}},
    { 7,INT,{$X, 5,$E,19,$B, 2,FS,18,IS,13,$U, 8,$d,16,DG,15,$_,12,       FI}},
    { 8,UNS,{IS,13,                                                       FI}},
    { 9,0,  {$B,13,                                                       FI}},
    {10,HEX,{HX,10,$_, 6,$U, 8,$S, 9,$L,13,$P,32,$d,28,                   FI}},
    {11,NAM,{ID,11,                                                       FI}},
    {12,0,  {$_,12,DG,15,                                                 EE}},
    {13,ISZ,{                                                             FI}},
    {14,0,  {$_,14,DG,17,                                                 EE}},
    {15,INT,{$E,19,FS,18,$d,16,DG,15,$_,12,$U, 8,IS,13,                   FI}},
    {16,FLT,{$E,19,FS,18,DG,17,                                           FI}},
    {17,FLT,{$E,19,$_,14,DG,17,FS,18,                                     FI}},
    {18,FSZ,{                                                             FI}},
    {19,0,  {$p,20,$m,20,DG,21,                                           EE}},
    {20,0,  {DG,21,                                                       EE}},
    {21,FLT,{DG,21,$_,22,FS,18,                                           FI}},
    {22,0,  {DG,21,$_,22,                                                 EE}},
    {23,DOT,{DG,17,$d,26,                                                 FI}},
    {24,NAM,{$q,ST,$a,CH,ID,11,                                           FI}},
    {25,ELL,{                                                             FI}},
    {26,0,  {$d,25,                                                       EE}},
    {27,HFS,{                                                             FI}},
    {28,HFP,{LH,27,HX,34,$P,32,                                           FI}},
    {29,0,  {DG,30,$_,29,                                                 EE}},
    {30,HFP,{LH,27,DG,30,$_,29,                                           FI}},
    {31,0,  {DG,30,                                                       EE}},
    {32,0,  {$p,31,$m,31,DG,30,                                           EE}},
    {33,0,  {$_,33,HX,34,                                                 EE}},
    {34,HFP,{LH,27,HX,34,$_,33,$P,32,                                     FI}},
};

#define UM(m)           (1 << UC_CHAR_TYPE_##m)
#define UM_LETTER       (UM(Lu)|UM(Ll)|UM(Lt)|UM(Lo))
#define UM_DIGIT        (UM(Nd)|UM(Nl)|UM(No))
#define ISALPHA(ch)     ((1 << OADL_ucCharType(ch)) & UM_LETTER)
#define ISALNUM(ch)     ((1 << OADL_ucCharType(ch)) & (UM_LETTER|UM_DIGIT))
#define ISDIGIT(ch)     (((ch) > '0') && ((ch) <= '9'))
#define TOUPPER(ch)     OADL_ucToUpper(ch)

void OADL_TokenStateMachine(TokenState *st, int ch)
{
    int i, si, done;
    int cu;
    short *transit;

    si = st->state;
    cu = TOUPPER(ch);
    transit = tokMach[si].transit;

#if TOK_DEBUG
    printf("ST: %2d; CU: '%c'; ACT: %d", si,
                (cu >= 32) ? cu : '?', tokMach[si].action);
#endif

    switch (tokMach[si].action ) {
    case INT : st->res = TOK_INTCON;                            break;
    case UNS : st->isUnsigned = 1;                              break;
    case ISZ : st->isSized = 1;                                 break;
    case BIN : st->isBinary = 1; st->isSized = 0;               break;
    case HEX : st->isHex = 1;                                   break;
    case NAM : st->res = TOK_NAME;                              break;
    case FLT : st->res = TOK_FLOATCON;                          break;
    case HFP : st->res = TOK_FLOATCON;                          break;
    case FSZ : st->res = TOK_FLOATCON; st->isSized = 1;         break;
    case HFS : st->res = TOK_FLOATCON; st->isSized = 1;         break;
    case DOT : st->res = TOK_DOT;                               break;
    case ELL : st->res = TOK_DOTDOTDOT;                         break;
    default :                                                   break;
    }

    done = 0;
    st->doAdd = 1;
    for (i = 0; transit[i] && !done; i += 2) {
        switch (transit[i]) {
        case ID : done = ISALNUM(ch)||(ch=='$')||(ch=='_');              break;
        case AL : done = ISALPHA(ch)||(ch=='$')||(ch=='_');              break;
        case DG : done = isdigit(ch);                                    break;
        case BN : done = (ch=='0')||(ch=='1');                           break;
        case IS : done = (cu=='B')||(cu=='L')||(cu=='S');                break;
        case FS : done = (cu=='D')||(cu=='H');                           break;
        case LH : done = (cu=='L')||(cu=='H');                           break;
        case HX : done = isxdigit(ch);                                   break;
        case $_ : done = (ch=='_');              if(done) st->doAdd = 0; break;
        case $U : done = (cu=='U');              if(done) st->doAdd = 0; break;
        case NY : done = 1;                                              break;
        default : done = (cu == transit[i]);                             break;
        }
        if (done) {
            st->state = transit[i+1];
        }
    }
#if TOK_DEBUG
    printf(" => ST: %2d; ADD: %d; i: %d\n", st->state, st->doAdd, i);
#endif

    // Clean-up for LSTRING/LCHAR
    switch (st->state) {
    case ST : st->res = TOK_LSTRING;  st->state = 0; break;
    case CH : st->res = TOK_LCHARCON; st->state = 0; break;
    default :                                      break;
    }
}