// OADL to generate fast fp16 conversion
// From http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf

proc convertmantissa(i)
{
    var m = Uint(i)<<13;
    var e = 0;
    while (!(m&0x00800000U)) {
        e -= 0x00800000U;
        m <<= 1;
    }
    m &= ~0x00800000U;
    e += 0x38800000U;
    return m | e;
}

proc generatetables(basetable, shifttable)
{
    for(var i = 0; i < 256; i++) {
        var e = i - 127;
        if (e < -24) {
            // Very small numbers map to zero
            basetable[i|0x000] = 0x0000;
            basetable[i|0x100] = 0x8000;
            shifttable[i|0x000] = 24;
            shifttable[i|0x100] = 24;
        }
        else if (e < -14) {
            // Small numbers map to denorms
            basetable[i|0x000] = (0x0400>>(-e-14));
            basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000;
            shifttable[i|0x000] = -e-1;
            shifttable[i|0x100] = -e-1;
        }
        else if (e <= 15) {
            // Normal numbers just lose precision
            basetable[i|0x000] = ((e+15)<<10);
            basetable[i|0x100] = ((e+15)<<10) | 0x8000;
            shifttable[i|0x000] = 13;
            shifttable[i|0x100] = 13;
        }
        else if (e < 128) {
            // Large numbers map to Infinity
            basetable[i|0x000] = 0x7C00;
            basetable[i|0x100] = 0xFC00;
            shifttable[i|0x000] = 24;
            shifttable[i|0x100] = 24;
        }
        else {
            // Infinity and NaN's stay Infinity and NaN's
            basetable[i|0x000] = 0x7C00;
            basetable[i|0x100] = 0xFC00;
            shifttable[i|0x000] = 13;
            shifttable[i|0x100] = 13;
        }
    }
}

proc main()
{
    var mantissatable = new PackUint(2048);
    mantissatable[0] = 0;
    for (var i = 1; i <= 1023; i++) mantissatable[i] = convertmantissa(i);
    for (var i = 1024; i <= 2047; i++) mantissatable[i] = 0x38000000U + ((i-1024)<<13);

    "uint32_t mantissatable[2048] = {\n";
    print("4X,6('0x',Z8.8,:,','),/", mantissatable);
    "};\n";

    var exponenttable = new PackUint(64);
    exponenttable[0] = 0;
    for (var i = 1; i <= 30; i++) exponenttable[i] = i << 23;
    exponenttable[31] = 0x47800000U;
    exponenttable[32] = 0x80000000U;
    for (var i = 33; i <= 62; i++) exponenttable[i] = 0x80000000U + ((i-32)<<23);
    exponenttable[63] = 0xC7800000U;

    "uint32_t exponenttable[64] = {\n";
    print("4X,6('0x',Z8.8,:,','),/", exponenttable);
    "};\n";

    var offsettable = new PackUshort(64);
    offsettable[0] = 0;
    for (var i = 1; i <= 31; i++) offsettable[i] = 1024;
    offsettable[32] = 0;
    for (var i = 33; i <= 63; i++) offsettable[i] = 1024;

    "uint16_t offsettable[64] = {\n";
    print("4X,10('0x',Z4.4,:,','),/", offsettable);
    "};\n";

    var basetable = new PackUshort(512);
    var shifttable = new PackUbyte(512);
    generatetables(basetable, shifttable);

    "uint16_t basetable[512] = {\n";
    print("4X,10('0x',Z4.4,:,','),/", basetable);
    "};\n";

    "uint8_t shifttable[512] = {\n";
    print("4X,15('0x',Z2.2,:,','),/", shifttable);
    "};\n";
}