// OADL to generate fast fp16 conversion // From http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf proc convertmantissa(i) { var m = Uint(i)<<13; var e = 0; while (!(m&0x00800000U)) { e -= 0x00800000U; m <<= 1; } m &= ~0x00800000U; e += 0x38800000U; return m | e; } proc generatetables(basetable, shifttable) { for(var i = 0; i < 256; i++) { var e = i - 127; if (e < -24) { // Very small numbers map to zero basetable[i|0x000] = 0x0000; basetable[i|0x100] = 0x8000; shifttable[i|0x000] = 24; shifttable[i|0x100] = 24; } else if (e < -14) { // Small numbers map to denorms basetable[i|0x000] = (0x0400>>(-e-14)); basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000; shifttable[i|0x000] = -e-1; shifttable[i|0x100] = -e-1; } else if (e <= 15) { // Normal numbers just lose precision basetable[i|0x000] = ((e+15)<<10); basetable[i|0x100] = ((e+15)<<10) | 0x8000; shifttable[i|0x000] = 13; shifttable[i|0x100] = 13; } else if (e < 128) { // Large numbers map to Infinity basetable[i|0x000] = 0x7C00; basetable[i|0x100] = 0xFC00; shifttable[i|0x000] = 24; shifttable[i|0x100] = 24; } else { // Infinity and NaN's stay Infinity and NaN's basetable[i|0x000] = 0x7C00; basetable[i|0x100] = 0xFC00; shifttable[i|0x000] = 13; shifttable[i|0x100] = 13; } } } proc main() { var mantissatable = new PackUint(2048); mantissatable[0] = 0; for (var i = 1; i <= 1023; i++) mantissatable[i] = convertmantissa(i); for (var i = 1024; i <= 2047; i++) mantissatable[i] = 0x38000000U + ((i-1024)<<13); "uint32_t mantissatable[2048] = {\n"; print("4X,6('0x',Z8.8,:,','),/", mantissatable); "};\n"; var exponenttable = new PackUint(64); exponenttable[0] = 0; for (var i = 1; i <= 30; i++) exponenttable[i] = i << 23; exponenttable[31] = 0x47800000U; exponenttable[32] = 0x80000000U; for (var i = 33; i <= 62; i++) exponenttable[i] = 0x80000000U + ((i-32)<<23); exponenttable[63] = 0xC7800000U; "uint32_t exponenttable[64] = {\n"; print("4X,6('0x',Z8.8,:,','),/", exponenttable); "};\n"; var offsettable = new PackUshort(64); offsettable[0] = 0; for (var i = 1; i <= 31; i++) offsettable[i] = 1024; offsettable[32] = 0; for (var i = 33; i <= 63; i++) offsettable[i] = 1024; "uint16_t offsettable[64] = {\n"; print("4X,10('0x',Z4.4,:,','),/", offsettable); "};\n"; var basetable = new PackUshort(512); var shifttable = new PackUbyte(512); generatetables(basetable, shifttable); "uint16_t basetable[512] = {\n"; print("4X,10('0x',Z4.4,:,','),/", basetable); "};\n"; "uint8_t shifttable[512] = {\n"; print("4X,15('0x',Z2.2,:,','),/", shifttable); "};\n"; }