// OADL to generate fast fp16 conversion
// From http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
proc convertmantissa(i)
{
var m = Uint(i)<<13;
var e = 0;
while (!(m&0x00800000U)) {
e -= 0x00800000U;
m <<= 1;
}
m &= ~0x00800000U;
e += 0x38800000U;
return m | e;
}
proc generatetables(basetable, shifttable)
{
for(var i = 0; i < 256; i++) {
var e = i - 127;
if (e < -24) {
// Very small numbers map to zero
basetable[i|0x000] = 0x0000;
basetable[i|0x100] = 0x8000;
shifttable[i|0x000] = 24;
shifttable[i|0x100] = 24;
}
else if (e < -14) {
// Small numbers map to denorms
basetable[i|0x000] = (0x0400>>(-e-14));
basetable[i|0x100] = (0x0400>>(-e-14)) | 0x8000;
shifttable[i|0x000] = -e-1;
shifttable[i|0x100] = -e-1;
}
else if (e <= 15) {
// Normal numbers just lose precision
basetable[i|0x000] = ((e+15)<<10);
basetable[i|0x100] = ((e+15)<<10) | 0x8000;
shifttable[i|0x000] = 13;
shifttable[i|0x100] = 13;
}
else if (e < 128) {
// Large numbers map to Infinity
basetable[i|0x000] = 0x7C00;
basetable[i|0x100] = 0xFC00;
shifttable[i|0x000] = 24;
shifttable[i|0x100] = 24;
}
else {
// Infinity and NaN's stay Infinity and NaN's
basetable[i|0x000] = 0x7C00;
basetable[i|0x100] = 0xFC00;
shifttable[i|0x000] = 13;
shifttable[i|0x100] = 13;
}
}
}
proc main()
{
var mantissatable = new PackUint(2048);
mantissatable[0] = 0;
for (var i = 1; i <= 1023; i++) mantissatable[i] = convertmantissa(i);
for (var i = 1024; i <= 2047; i++) mantissatable[i] = 0x38000000U + ((i-1024)<<13);
"uint32_t mantissatable[2048] = {\n";
print("4X,6('0x',Z8.8,:,','),/", mantissatable);
"};\n";
var exponenttable = new PackUint(64);
exponenttable[0] = 0;
for (var i = 1; i <= 30; i++) exponenttable[i] = i << 23;
exponenttable[31] = 0x47800000U;
exponenttable[32] = 0x80000000U;
for (var i = 33; i <= 62; i++) exponenttable[i] = 0x80000000U + ((i-32)<<23);
exponenttable[63] = 0xC7800000U;
"uint32_t exponenttable[64] = {\n";
print("4X,6('0x',Z8.8,:,','),/", exponenttable);
"};\n";
var offsettable = new PackUshort(64);
offsettable[0] = 0;
for (var i = 1; i <= 31; i++) offsettable[i] = 1024;
offsettable[32] = 0;
for (var i = 33; i <= 63; i++) offsettable[i] = 1024;
"uint16_t offsettable[64] = {\n";
print("4X,10('0x',Z4.4,:,','),/", offsettable);
"};\n";
var basetable = new PackUshort(512);
var shifttable = new PackUbyte(512);
generatetables(basetable, shifttable);
"uint16_t basetable[512] = {\n";
print("4X,10('0x',Z4.4,:,','),/", basetable);
"};\n";
"uint8_t shifttable[512] = {\n";
print("4X,15('0x',Z2.2,:,','),/", shifttable);
"};\n";
}