/*
 * Copyright (c) 2024 Ross Cunniff
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/* OADL intrinsics.  This file should be compiled with:
 *
 *      ../bin/oadlc intrinsic.oad -o intrinsic.oax
 *
 * and then translated to C byte initializers with:
 *
 *      ../bin/cvtintr intrinsic.oax >intrinsic.c
 *
 * Rules for templates:
 *      Leaf-node procedures (except for intrinsics like
 *      length, array, min, etc. and embedded proc
 *      constants - see $sort for example)
 *
 *      No references to global variables, objects, classes, etc.
 *
 *      No string or array constants
 */

using namespace oadl;

// Helper p;rocs
proc RECURSE_BEGIN()
{
    if (intrpush() > MAX_RANK) {
        intrexit();
        throw ShapeCheck;
    }
}
#define RECURSE_END() {
    intrpop();
}
#define RECURSE_RETURN(val) {
    intrpop();
    return val
}
#define RECURSE_THROW(val) {
    intrexit();
    throw val;
}

/*************************************************************************
 * Monadic vector intrinsics
 *************************************************************************/
#define MONAD(name, op) {
    proc name(a)
    {
        var res;
        RECURSE_BEGIN();
        if (??a == Enclosure) {
            a = a.disclose();
            res = (op a).enclose();
        }
        else if (isarray(a)) {
            res = array(shape(a));
            forall(a#[i]) {
                res#[i] = op a#[i];
            }
            res = pack(res);
        }
        else {
            res = op a;
        }
        RECURSE_RETURN(res);
    }
}

#define POSTFIX(name, op) {
    proc name(a)
    {
        var res;
        RECURSE_BEGIN();
        if (??a == Enclosure) {
            res = a.disclose();
            res op;
        }
        else if (isarray(a)) {
            res = array(shape(a));
            forall(a#[i]) {
                res#[i] = a#[i];
                res#[i] op;
            }
            res = pack(res);
        }
        else {
            res = a;
            res op;
        }
        RECURSE_RETURN(res);
    }
}

/*************************************************************************
 * Dyadic vector intrinsics
 *************************************************************************/
#define CKVEC(arr,shp,bit) {
    if (typeof(arr) == Enclosure) {
        mke |= bit;
        arr = disclose(arr);
    }
    else if (isarray(arr)) {
        shp = shape(arr);
        amsk |= bit;
    }
}

// Promoted dyad
#define PDYAD(name, op) {
    proc name(a, b)
    {
        // Normalize
        var sa, sb;
        var amsk = 0, mke = 0;
        CKVEC(a,sa,1)
        CKVEC(b,sb,2)

        if ((amsk == 3) && arrcmp(sa,sb)) { RECURSE_THROW(ShapeCheck); }

        RECURSE_BEGIN();

        if (!amsk) {
            if (mke)      { RECURSE_RETURN(enclose(a op b)); }
            else          { RECURSE_RETURN(a op b); }
        }

        // Do type promotion
        // Use the promoted type as the result type
        var ta = (amsk & 1) ? arrbase(a) : typeof(a);
        var tb = (amsk & 2) ? arrbase(b) : typeof(b);
        var tr = Array;
        if (isnumeric(ta) && isnumeric(tb) && !mke) {
            tr = packtype(promote(ta,tb));
        }
        var res = (amsk & 1) ? packed(tr,sa) : packed(tr,sb);

        switch (amsk) {
        //se 0 :                    res     = a     op  b;
        case 1 :  forall(res#[i]) { res#[i] = a#[i] op  b;     }
        case 2 :  forall(res#[i]) { res#[i] = a     op  b#[i]; }
        case 3 :  forall(res#[i]) { res#[i] = a#[i] op  b#[i]; }
        }

        if (mke) forall(res#[i]) { res#[i] = enclose(res#[i]); }
        RECURSE_RETURN(pack(res));
    }
}

// Non-promoted dyad
#define NPDYAD(name, op) {
    proc name(a, b)
    {
        // Normalize
        var sa, sb;
        var amsk = 0, mke = 0;
        CKVEC(a,sa,1)
        CKVEC(b,sb,2)

        if ((amsk == 3) && arrcmp(sa,sb)) { RECURSE_THROW(ShapeCheck); }

        RECURSE_BEGIN();

        if (!amsk) {
            if (mke)      { RECURSE_RETURN(enclose(a op b)); }
            else          { RECURSE_RETURN(a op b); }
        }

        // Result is guaranteed non-packable
        var res = (amsk & 1) ? array(sa) : array(sb);

        switch (amsk) {
        //se 0 :                    res     = a     op  b;
        case 1 :  forall(res#[i]) { res#[i] = a#[i] op  b;     }
        case 2 :  forall(res#[i]) { res#[i] = a     op  b#[i]; }
        case 3 :  forall(res#[i]) { res#[i] = a#[i] op  b#[i]; }
        }

        if (mke) forall(res#[i]) { res#[i] = enclose(res#[i]); }
        RECURSE_RETURN(pack(res));
    }
}

// Typed dyad
#define TDYAD(name, op, typ) {
    proc name(a, b)
    {
        // Normalize
        var sa, sb;
        var amsk = 0, mke = 0;
        CKVEC(a,sa,1)
        CKVEC(b,sb,2)

        if ((amsk == 3) && arrcmp(sa,sb)) { RECURSE_THROW(ShapeCheck); }

        RECURSE_BEGIN();

        if (!amsk) {
            if (mke)      { RECURSE_RETURN(enclose(a op b)); }
            else          { RECURSE_RETURN(a op b); }
        }

        // "typ" is the type we need (e.g. Bool for <)
        var tr = mke ? Array : typ;
        var res = (amsk & 1) ? packed(tr,sa) : packed(tr,sb);

        switch (amsk) {
        //se 0 :                    res     = a     op  b;
        case 1 :  forall(res#[i]) { res#[i] = a#[i] op  b;     }
        case 2 :  forall(res#[i]) { res#[i] = a     op  b#[i]; }
        case 3 :  forall(res#[i]) { res#[i] = a#[i] op  b#[i]; }
        }

        if (mke) forall(res#[i]) { res#[i] = enclose(res#[i]); }
        RECURSE_RETURN(pack(res));
    }
}

/*************************************************************************
 * One-argument intrinsic operators
 *************************************************************************/
#define INTR1(name, intr) {
    proc name(a)
    {
        var res;

        RECURSE_BEGIN();
        if (isarray(a)) {
            res = array(shape(a));
            forall (res#[i]) { res#[i] = intr(a#[i]); }
            res = pack(res);
        }
        else if (??a == Enclosure) {
            res = intr(a.disclose()).enclose();
        }
        else {
            res = intr(a);
        }
        RECURSE_RETURN(res);
    }
}

/*************************************************************************
 * Two-argument intrinsic operators
 *************************************************************************/
#define INTR2(name, intr) {
    proc name(a, b)
    {
        // Normalize
        var sa, sb;
        var amsk = 0, mke = 0;
        CKVEC(a,sa,1)
        CKVEC(b,sb,2)

        if ((amsk == 3) && arrcmp(sa,sb)) { RECURSE_THROW(ShapeCheck); }

        RECURSE_BEGIN();

        if (!amsk) {
            if (mke)      { RECURSE_RETURN(enclose(intr(a,b))); }
            else          { RECURSE_RETURN(intr(a, b)); }
        }

        // Do type promotion
        var ta = (amsk & 1) ? arrbase(a) : typeof(a);
        var tb = (amsk & 2) ? arrbase(b) : typeof(b);
        var tr = Array;
        if (isnumeric(ta) && isnumeric(tb) && !mke) {
            tr = packtype(promote(ta,tb));
        }
        var res = (amsk & 1) ? packed(tr,sa) : packed(tr,sb);

        switch (amsk) {
        //se 0 :                    res     = intr(a,     b);
        case 1 :  forall(res#[i]) { res#[i] = intr(a#[i], b);     }
        case 2 :  forall(res#[i]) { res#[i] = intr(a,     b#[i]); }
        case 3 :  forall(res#[i]) { res#[i] = intr(a#[i], b#[i]); }
        }

        if (mke) forall(res#[i]) { res#[i] = enclose(res#[i]); }
        RECURSE_RETURN(res);
    }
}

/*************************************************************************
 * Three-argument intrinsic operators
 *************************************************************************/
#define INTR3(name, intr) {
    proc name(a, b, c)
    {
        // Normalize
        var sa, sb, sc;
        var amsk = 0, mke = 0;
        CKVEC(a,sa,1)
        CKVEC(b,sb,2)
        CKVEC(c,sc,4)

        if ((amsk == 7) && (arrcmp(sa,sb) || arrcmp(sa,sc))) {
            RECURSE_THROW(ShapeCheck);
        }

        RECURSE_BEGIN();
        if (!amsk) {
            if (mke)      { RECURSE_RETURN(enclose(intr(a,b,c))); }
            else          { RECURSE_RETURN(intr(a, b, c)); }
        }

        // Do type promotion
        var ta = (amsk & 1) ? arrbase(a) : typeof(a);
        var tb = (amsk & 2) ? arrbase(b) : typeof(b);
        var tc = (amsk & 4) ? arrbase(c) : typeof(c);
        var tr = Array;
        if (isnumeric(ta) && isnumeric(tb) && isnumeric(tc) && !mke) {
            tr = packtype(promote(promote(ta,tb),tc));
        }
        var res = (amsk & 1) ? packed(tr,sa)
                             : ((amsk & 2) ? packed(tr,sb) : packed(tr,sc));

        switch (amsk) {
        //se 0 :                    res    =  intr(a,     b,     c);
        case 1 :  forall(res#[i]) { res#[i] = intr(a#[i], b,     c); }
        case 2 :  forall(res#[i]) { res#[i] = intr(a,     b#[i], c); }
        case 3 :  forall(res#[i]) { res#[i] = intr(a#[i], b#[i], c); }
        case 4 :  forall(res#[i]) { res#[i] = intr(a,     b,     c#[i]); }
        case 5 :  forall(res#[i]) { res#[i] = intr(a#[i], b,     c#[i]); }
        case 6 :  forall(res#[i]) { res#[i] = intr(a,     b#[i], c#[i]); }
        case 7 :  forall(res#[i]) { res#[i] = intr(a#[i], b#[i], c#[i]); }
        }

        if (mke) forall(res#[i]) { res#[i] = enclose(res#[i]); }

        RECURSE_RETURN(res);
    }
}

/*************************************************************************
 * Four-argument intrinsic operators
 *************************************************************************/
#define INTR4(name, intr) {
    proc name(a, b, c, d)
    {
        // Normalize
        var sa, sb, sc, sd;
        var amsk = 0, mke = 0;
        CKVEC(a,sa,1)
        CKVEC(b,sb,2)
        CKVEC(c,sc,4)
        CKVEC(d,sd,8)

        if ((amsk == 15) && (arrcmp(sa,sb) || arrcmp(sa,sc) || arrcmp(sa,sd))) {
            RECURSE_THROW(ShapeCheck);
        }

        RECURSE_BEGIN();
        if (!amsk) {
            if (mke)      { RECURSE_RETURN(enclose(intr(a,b,c,d))); }
            else          { RECURSE_RETURN(intr(a, b, c, d)); }
        }

        // Do type promotion
        var ta = (amsk & 1) ? arrbase(a) : typeof(a);
        var tb = (amsk & 2) ? arrbase(b) : typeof(b);
        var tc = (amsk & 4) ? arrbase(c) : typeof(c);
        var td = (amsk & 8) ? arrbase(d) : typeof(d);
        var tr = Array;
        if (isnumeric(ta) && isnumeric(tb) && isnumeric(tc)
            && isnumeric(td) && !mke)
        {
            tr = packtype(promote(promote(promote(ta,tb),tc),td));
        }
        var res = (amsk & 1) ? packed(tr,sa)
                             : ((amsk & 2) ? packed(tr,sb)
                                           : ((amsk & 4) ? packed(tr,sc)
                                                         : packed(tr,sd)));

        switch (amsk) {
        //se 0 :                   res     = intr(a,     b,     c,     d);
        case 1 :  forall(res#[i]) {res#[i] = intr(a#[i], b,     c,     d); }
        case 2 :  forall(res#[i]) {res#[i] = intr(a,     b#[i], c,     d); }
        case 3 :  forall(res#[i]) {res#[i] = intr(a#[i], b#[i], c,     d); }
        case 4 :  forall(res#[i]) {res#[i] = intr(a,     b,     c#[i], d); }
        case 5 :  forall(res#[i]) {res#[i] = intr(a#[i], b,     c#[i], d); }
        case 6 :  forall(res#[i]) {res#[i] = intr(a,     b#[i], c#[i], d); }
        case 7 :  forall(res#[i]) {res#[i] = intr(a#[i], b#[i], c#[i], d); }
        case 8 :  forall(res#[i]) {res#[i] = intr(a,     b,     c,     d#[i]);}
        case 9 :  forall(res#[i]) {res#[i] = intr(a#[i], b,     c,     d#[i]);}
        case 10 : forall(res#[i]) {res#[i] = intr(a,     b#[i], c,     d#[i]);}
        case 11 : forall(res#[i]) {res#[i] = intr(a#[i], b#[i], c,     d#[i]);}
        case 12 : forall(res#[i]) {res#[i] = intr(a,     b,     c#[i], d#[i]);}
        case 13 : forall(res#[i]) {res#[i] = intr(a#[i], b,     c#[i], d#[i]);}
        case 14 : forall(res#[i]) {res#[i] = intr(a,     b#[i], c#[i], d#[i]);}
        case 15 : forall(res#[i]) {res#[i] = intr(a#[i], b#[i], c#[i], d#[i]);}
        }

        if (mke) forall(res#[i]) { res#[i] = enclose(res#[i]); }
        RECURSE_RETURN(res);
    }
}


PDYAD($vadd, +)
PDYAD($vsub, -)
PDYAD($vmul, *)
PDYAD($vpow, **)
PDYAD($vdiv, /)
PDYAD($vmod, %)
PDYAD($vand, &)
PDYAD($vor, |)
PDYAD($vxor, ^)
PDYAD($vlshift, <<)
PDYAD($vrshift, >>)
TDYAD($vlt, <, Bool)
TDYAD($vle, <=, Bool)
TDYAD($veq, #=, Bool)
TDYAD($vge, >=, Bool)
TDYAD($vgt, >, Bool)
TDYAD($vne, !=, Bool)
NPDYAD($vcvt, =>)

MONAD($vneg, -)
MONAD($vnot, ~)
POSTFIX($vinc, ++)
POSTFIX($vdec, --)

INTR1($vabs, abs)
INTR1($vsignum, signum)

INTR2($vfix2flt, fix2flt)
INTR2($vflt2fix, flt2fix)
INTR2($vmin, min)
INTR2($vmax, max)

INTR3($vclamp, clamp)
INTR3($vlerp, lerp)

INTR4($vsatadd, satadd)
INTR4($vsatsub, satsub)

proc $vindex(a)
{
    if (!a.isarray()) throw oadl::TypeCheck;
    var ixs = oadl::argvec()[1:];       // ixs is the list of indexes
    var ixn = ixs.length();
    if (a.rank() < ixn) throw oadl::ShapeCheck; // Must match
    var atyp = typeof(a);
    var dshp = 0 .iterate(); // Start with empty shape
    var dlen = dshp; // And empty partition info
    var dstride = dshp; // And emtpy stride concatenation
    var res;

    // Append iteration arrays as needed
    var arank = a.rank(), ashp = a.shape();
    for (var i = ixn; i < arank; i++) {
        ixs = ixs ## { a.shape()[i].iterate() };
        ixn++;
    }

    // Build iteration structures.  dshp is the ultimate shape of
    // the assigned area. dlen is an array of partition info - an
    // integer for each index indicating how many dimensions that
    // index comprises. dstride is a concatenated array of strides.
    forall (ixs[i]) {
        var ixi = Int(ixs[i]);
        ixs[i] = ixi;
        if (ixi.isarray()) {
            dshp = dshp ## ixi.shape();
            dlen = dlen ## ixi.rank();
            dstride = dstride ## ixi.stride();
        }
        else {
            dlen = dlen ## 0;
        }
    }
    if (dshp.length()) {
        // Build the array result
        res = new atyp(dshp);
    }
    else {
        res = new atyp(1); // Gotta have something to iterate
    }

    // Save the strides - we will use them to create offsets
    var str = a.stride();

    // Iterate through the destination and create source indexes
    var idx = 0 .reshape(dshp.length());
    forall (res#[roffs]) {
        // Compose the index
        var offs = 0, tidx = idx, tstride = dstride;
        forall (ixs[i]) {
            var ixi = ixs[i], dli = dlen[i], aIdx;
            if (dli) {
                // Vector - look up appropriate index
                var currIdx = tidx[:dli-1];
                var currStr = tstride[:dli-1];
                aIdx = ixi#[(currIdx*currStr).reduce(`+)];
                // Treat tidx and tstride as pointers
                tidx = tidx[dli:];
                tstride = tstride[dli:];
            }
            else {
                // Scalar - index arg *is* the index
                aIdx = ixi;
            }
            aIdx = Int(aIdx);
            offs += aIdx*str[i];
        }
        res#[roffs] = a#[offs];
        idx = idx.increment(dshp);
    }

    // Strip artificial array enclosure if necessary
    return dshp.length() ? res : res[0];
}

proc $vsetsubr(a)
{
    if (!a.isarray()) throw oadl::TypeCheck;
    var ixn = oadl::nargs()-2;
    if (ixn & 1) throw oadl::ArgCheck;
    var nidx = ixn / 2;
    var idx = new Array(nidx+1);
    var ixs = oadl::argvec()[1:ixn];  // ixs is the list of indexes
    var v = oadl::arg(ixn+1);
    if (a.rank() != nidx) throw oadl::ShapeCheck; // Must match
    var ashp = a.shape();

    // Convert list of beg,end pairs to list of [beg:end] iterators
    for (var i = 0; i < nidx; i++) {
        var beg = ixs[2*i];
        var end = ixs[2*i+1];

        // Clamp to array
        if ((beg == nil) || (beg < 0)) beg = 0;
        if ((end == nil) || (end >= ashp[i])) end = ashp[i] - 1;

        // Force indices to ints
        beg = Int(beg);
        end = Int(end);

        // It's illegal to assign an empty subr
        if (beg > end) throw oadl::RangeCheck;

        // Create either a scalar or a subr
        idx[i] = (beg == end) ? beg : [beg : end];
    }
    idx[nidx] = v;
    a.`[=]#(idx);
}

proc $vsetindex(a)
{
    if (!a.isarray()) throw oadl::TypeCheck;
    var ixn = oadl::nargs()-2;
    var ixs = oadl::argvec()[1:ixn];  // ixs is the list of indexes
    var v = oadl::arg(ixn+1);
    var arnk = a.rank();
    if (arnk < ixn) throw oadl::ShapeCheck; // Must match
    var atyp = typeof(a);
    var ashp = a.shape();
    var dshp = 0 .iterate(); // Start with empty shape
    var dlen = dshp; // And empty partition info
    var dstride = dshp; // And empty stride concatenation

    if (arnk > ixn) {
        // Make up indices for the last dimensions
        ixs = ixs.reshape(arnk);
        for (var i = ixn; i < arnk; i++) {
            ixs[i] = [0:ashp[i]-1];
        }
    }

    // Build iteration structures.  dshp is the ultimate shape of
    // the assigned area. dlen is an array of partition info - an
    // integer for each index indicating how many dimensions that
    // index comprises. dstride is a concatenated array of strides.
    forall (ixs[i]) {
        var ixi = Int(ixs[i]);
        ixs[i] = ixi;
        if (ixi.isarray()) {
            dshp = dshp ## ixi.shape();
            dlen = dlen ## ixi.rank();
            dstride = dstride ## ixi.stride();
        }
        else {
            dlen = dlen ## 0;
        }
    }
    if (v.isarray()) {
        var len = dshp.length(), alleq;
        if (len) {
            // The shape of the assignee area and the value must match.
            alleq = (len == v.rank()) && (dshp #= v.shape()).reduce(`&);
            if (!alleq) throw oadl::ShapeCheck;
        }
        else {
            // We can assign an array to a slot but
            // we need something to iterate
            v = {v};
        }
    }
    else if (dshp.length()) {
        // A scalar matches an array destination.
        v = v.reshape(dshp);
    }
    else {
        v = {v}; // Gotta have something to iterate
    }

    // Save the strides - we will use them to create offsets
    var str = a.stride();

    // Iterate through the source and create destination indexes.
    var idx = 0 .reshape(dshp.length());
    forall (v#[roffs]) {
        // Compose the index
        var offs = 0, tidx = idx, tstride = dstride;
        forall (ixs[i]) {
            var ixi = ixs[i], dli = dlen[i], aIdx;
            if (dli) {
                // Vector - look up appropriate index
                var currIdx = tidx[:dli-1], currStr = tstride[:dli-1];
                aIdx = ixi#[(currIdx*currStr).reduce(`+)];
                // Treat tidx and tstride as pointers
                tidx = tidx[dli:];
                tstride = tstride[dli:];
            }
            else {
                // Scalar - index arg *is* the index
                aIdx = ixi;
            }
            if ((aIdx < 0) || (aIdx >= ashp[i])) throw oadl::RangeCheck;
            offs += aIdx*str[i];
        }
        a#[offs] = v#[roffs];
        idx = idx.increment(dshp);
    }
}

proc $vindexhash(a,idx)
{
    idx = Int(idx);
    var res = a.reshape(idx.shape());

    forall (res#[i]) {
        res#[i] = a#[idx#[i]];
    }
    return res;
}

proc $vsetindexhash(a,idx,val)
{
    idx = Int(idx);
    if (val.isarray()) {
        // Just need to match in length
        if (idx.sizeof() != val.sizeof()) throw oadl::ShapeCheck;
        forall (idx#[i]) {
            a#[idx#[i]] = val#[i];
        }
    }
    else {
        forall (idx#[i]) {
            a#[idx#[i]] = val;
        }
    }
}

// Used for min/max/concat
proc $vaccumargs()
{
    var i, n, prc, res;

    n = nargs() - 1;    // The number of items does not include the prc
    prc = arg(n);      // The proc to apply is the last arg
    res = arg(0);      // This assumes n >= 2
    RECURSE_BEGIN();
    for (i = 1; i < n; i++) {
        res = prc(res,arg(i));
    }
    RECURSE_RETURN(res);
}

proc redIdent(op, src)
{
    var base = src.arrbase();

    switch (op) {
    case `|, `^, `+, `-, `% :   return (base == Array) ? 0 : base(0);
    case `*, `/, `** :          return (base == Array) ? 1 : base(1);
    case `&, `min :             return (base == Array) ? 1 : base.maxval();
    case `max :                 return (base == Array) ? 0 : base.minval();
    default :                   return nil;
    }
}

proc $vreduce()
{
    if (nargs() > 3) throw ArgCheck;
    var src = arg(0);
    var srcShape = src.shape();
    var srcRank = src.rank();
    var op = arg(1);
    var ax = (nargs() == 3) ? arg(2) : -1;
    if (ax < 0) ax = srcRank + ax;
    var dstRank = srcRank-1;
    var dstShape = dstRank ? (srcShape[:ax-1] ## srcShape[ax+1:]) : [1];
    var srcStride = src.stride();
    var ssax = srcStride[ax];
    var st = src.parent.parent;
    var dst = dstRank ? (new st(dstShape)) : {redIdent(op, src)};
    var dstIdx = dstRank ? (0).reshape(dstRank) : [0];
    var dstLen = src.sizeof() ? dst.sizeof() : 0;
    var dstTail = dstRank ? ax : ax+1;
    var so;
    var n = srcShape[ax];

    for (var dstOffs = 0; dstOffs < dstLen; dstOffs++) {
        so  = (srcStride[:ax-1]*dstIdx[:ax-1]).reduce(`+);
        so += (srcStride[ax+1:]*dstIdx[dstTail:]).reduce(`+);
        so += ssax*(n - 1);
        var v = src#[so]; so -= ssax;
        if (typeof(op) == Public) {
            for (var i = n-2; i >= 0; i--, so -= ssax) {
                var lhs = src#[so];
                v = lhs.(op)(v);
            }
        }
        else {
            // Proc or extern maybe
            for (var i = n-2; i >= 0; i--, so -= ssax) {
                var lhs = src#[so];
                v = op(lhs,v);
            }
        }
        dst#[dstOffs] = v;
        dstIdx = dstIdx.increment(dstShape);
    }
    return dstRank ? dst : dst[0];
}

proc $vaccum()
{
    var na = nargs();
    if (na > 3) throw ArgCheck;
    var src = arg(0);
    var srcShape = src.shape();
    var srcRank = src.rank();
    var op = arg(1);
    var ax = (na == 3) ? arg(2) : -1;
    if (ax < 0) ax = srcRank + ax;
    var dstShape = srcShape[:ax-1] ## srcShape[ax+1:];
    var dstRank = srcRank-1;
    var srcStride = src.stride();
    var dst = @src;
    var dstIdx = (0).reshape(dstRank);
    var offs;
    var axisLen = srcShape[ax];
    var axisStride = srcStride[ax];
    var typ = typeof(src);
    var optim = true;
    var alt = op;

    switch (op) {
    case `% :
        optim = false;
    case `** :
        optim = false;
    case `/ :
        optim = (typ == PackFloat) || (typ == PackDouble);
        alt = `*;
    case `- :
        alt = `+;
    }
    if (typeof(op) != Public) {
        optim = false;
    }

    if (optim) {
        do {
            offs  = (srcStride[:ax-1]*dstIdx[:ax-1]).reduce(`+);
            offs += (srcStride[ax+1:]*dstIdx[ax:]).reduce(`+);
            var prev = src#[offs];
            dst#[offs] = prev;

            for (var i = 1; i < axisLen; i++) {
                offs += axisStride;
                var val = src#[offs];
                prev = (i & 1) ? prev.(op)(val) : prev.(alt)(val);
                dst#[offs] = prev;
            }
            dstIdx = dstIdx.increment(dstShape);
        } while (dstIdx);
    }
    else {
        var row = src.reshape(axisLen);
        do {
            offs  = (srcStride[:ax-1]*dstIdx[:ax-1]).reduce(`+);
            offs += (srcStride[ax+1:]*dstIdx[ax:]).reduce(`+);

            // Get a row from the source
            for (var i = 0; i < axisLen; i++) {
                row[i] = src#[offs];
                offs += axisStride;
            }
            // offs is now pointing one after the axis. This is on purpose.

            // Execute scan in the row
            for (var i = axisLen-1; i >= 0; i--) {
                var val = row[i];
                offs -= axisStride;
                if (typeof(op) == Public) {
                    for (var j = i-1; j >= 0; j--) {
                        val = row[j].(op)(val);
                    }
                }
                else {
                    // Proc or extern
                    for (var j = i-1; j >= 0; j--) {
                        val = op(row[j],val);
                    }
                }
                dst#[offs] = val;
            }
            dstIdx = dstIdx.increment(dstShape);
        } while (dstIdx);
    }
    return dst;
}

proc $venclose(arr, axis)
{
    if (nargs() > 2) throw ArgCheck;
    // Check for null axis specification
    if (nargs() < 2) return {arr};

    var shp = arr.shape();
    var typ = typeof(arr);
    var rnk = arr.rank();
    var ax = axis;
    var aLen;

    // Normalize axis
    if (!ax.isarray()) ax = [ax];

    // Adjust relative axis
    forall (ax#[i]) if (ax#[i] < 0) ax#[i] += rnk;
    aLen = ax.length();

    // Error check - must be a vector
    if (ax.rank() != 1) throw ShapeCheck;

    // Error check - each axis must be present only once
    // and must be in range
    forall (ax[i]) {
        if (ax[i] >= rnk) throw RangeCheck;
        for (var j = i+1; j < aLen; j++) {
            if (ax[i] == ax[j]) throw ShapeCheck;
        }
    }

    // Build the new outer shape
    var newShp = [];
    var newAxis = [];
    var scalar = true;
    forall(shp[i]) {
        var found = false;
        forall(ax[j]) if (ax[j] == i) found = true;
        if (!found) {
            newShp = newShp ## shp[i];
            newAxis = newAxis ## i;
            scalar = false;
        }
    }
    forall (ax[i]) newAxis = newAxis ## ax[i];

    var inner = shp[ax];
    var srcStride = arr.stride();

    // Create the destination array
    var result = new Array(scalar ? 1 : newShp);
    forall (result#[i]) result#[i] = new typ(inner);

    var outStr = result.stride();
    var inStr = result#[0].stride();

    var outIdx = (0).reshape(outStr.length());
    var idxSrc = new PackInt(newAxis.length());

    do {
        var iOut;
        iOut = (outStr*outIdx).reduce(`+);
        var curr = result#[iOut];
        var inIdx = (0).reshape(inner.length());
        do {
            var idxDst = scalar ? inIdx : (outIdx ## inIdx);
            forall (idxSrc[i]) idxSrc[newAxis[i]] = idxDst[i];
            var iSrc, iIn;
            iSrc = (srcStride*idxSrc).reduce(`+);
            iIn = (inStr*inIdx).reduce(`+);
            curr#[iIn] = arr#[iSrc];
            inIdx = inIdx.increment(inner);
        } while (inIdx);
        outIdx = scalar ? nil : outIdx.increment(newShp);
    } while (outIdx);

    forall (result#[i]) result#[i] = result#[i].enclose();
    return result;
}

proc $vdisclose(arr)
{
    if (arr.isarray()) {
        return foreach(arr#[i]) {arr#[i].disclose()};
    }
    else {
        return arr.disclose();
    }
}

proc $vouter(arr1,op,arr2)
{
    if (nargs() != 3) throw ArgCheck;
    if (!(isarray(arr1) && isarray(arr2))) {
        return (op ?= Public) ? arr1.(op)(arr2) : op(arr1,arr2);
    }

    var result;
    var typ1 = typeof(arr1); if (typ1 == List) typ1 = Array;
    var typ2 = typeof(arr2); if (typ2 == List) typ2 = Array;
    var rtyp;
    var shp1 = arr1.shape();
    var shp2 = arr2.shape();
    var sz2 = arr2.sizeof();
    var typ = nil;

    switch (op) {
    case `<, `<=, `>, `>=, `#=, `!= :
        rtyp = ((typ1 == Array) || (typ2 == Array)) ? Array : PackBool;
    case `== :
        op = `#=;
        rtyp = ((typ1 == Array) || (typ2 == Array)) ? Array : PackBool;
    default :
        rtyp = (typ1 == typ2) ? typ1 : Array;
        if (typeof(op) != Public) rtyp = Array;
    }

    result = new rtyp(shp1##shp2);
    if (typeof(op) == Public) {
        forall (result#[i]) {
            var i1 = i / sz2;
            var i2 = i % sz2;
            result#[i] = (arr1#[i1]).(op)(arr2#[i2]);
        }
    }
    else {
        forall (result#[i]) {
            var i1 = i / sz2;
            var i2 = i % sz2;
            result#[i] = op(arr1#[i1], arr2#[i2]);
        }
    }

    if (rtyp == Array) {
        result = result.pack();
    }

    return result;
}

proc $vinner(arr1,op1,op2,arr2)
{
    if (nargs() != 4) throw ArgCheck;
    var ca = arr1.enclose(-1);
    var cb = arr2.enclose(0);
    var out = ca.outer(op2,cb);
    var result = new Array(out.shape());
    forall (out#[i]) result#[i] = (out#[i]).disclose().reduce(op1);
    result = result.pack();
    if (result.sizeof() == 1) result = result#[0];
    return result;
}

proc $vincrement(idx,shp)
{
    if (nargs() != 2) throw ArgCheck;
    if (idx.rank() != 1) throw ShapeCheck;
    if (shp.rank() != 1) throw ShapeCheck;
    var len = idx.length();
    if (len != shp.length()) throw RangeCheck;
    var result = new PackInt(len);

    forall (result[i]) result[i] = idx[i];

    // Assume fully rolled up
    for (var i = len-1; i >= 0; i--) {
        // Increment the index
        result[i]++;

        // Are we done rolling up the odometer?
        if (shp[i] > result[i]) return result;

        // Roll back for next trip around the loop
        result[i] = 0;
    }

    // Rolled up all the way
    return nil;
}

proc $vravel()
{
    var arr = arg(0);
    var axis = nil;
    var shp = arr.shape();
    var rnk = arr.rank();

    if (nargs() > 2)      axis = argvec()[1:];
    else if (nargs() > 1) axis = arg(1);

    if (axis.isarray()) {
        var prod = 1;
        var alen = axis.length();
        if ((alen > rnk) || (axis.rank() != 1)) throw ShapeCheck;
        forall (axis[i]) {
            if (!axis[i].isinteger()) throw TypeCheck;
            if ((axis[i] < 0) || (axis[i] >= rnk)) throw RangeCheck;
            if ((i > 0) && (axis[i] != (axis[i-1])+1)) throw ShapeCheck;
            prod *= shp[axis[i]];
        }
        shp = shp[0:axis[0]-1] ## prod ## shp[axis[alen-1]+1:];
    }
    else if (axis.isfloat()) {
        var i0;
        if ((axis <= -1.) || (axis >= rnk)) throw RangeCheck;
        i0 = (axis < 0.) ? Int(axis-1.) : Int(axis);
        if (i0 == axis) throw RangeCheck;
        shp = shp[:i0] ## 1 ## shp[i0+1:];
    }
    else if (axis.isinteger()) {
        if ((axis < 0) || (axis >= rnk)) throw RangeCheck;
        shp = arr.sizeof();
    }
    else if (axis == nil) {
        shp = arr.sizeof();
    }
    else {
        throw TypeCheck;
    }

    return arr.reshape(shp);
}

proc $vunique(arr)
{
    var res, tmp, n;

    if (!arr.isarray()) arr = {arr};

    // Put all items of arr into a dictionary
    tmp = new Dict(arr.sizeof());
    forall (arr#[i]) tmp[arr#[i]] = true;

    // Extract items of the dictionary into result
    res = new (arr.parent.parent)(arr.sizeof());
    n = 0;
    forall (tmp[i]) {
        if (tmp[i] != nil) {
            res[n] = i;
            n++;
        }
    }

    // Return sub-result
    return res[:n-1];
}

proc $vunion(lhs,rhs)
{
    var res, tmp, n;
    var ptype;

    if (!lhs.isarray()) lhs = {lhs};
    if (!rhs.isarray()) rhs = {rhs};

    ptype = lhs.parent.parent;
    if (rhs.parent.parent != ptype) ptype = Array;

    // Put all items of arr into a dictionary
    n = lhs.sizeof() + rhs.sizeof();
    tmp = new Dict(n);
    forall (lhs#[i]) tmp[lhs#[i]] = true;
    forall (rhs#[i]) tmp[rhs#[i]] = true;

    // Extract items of the dictionary into result
    res = new (ptype)(n);
    n = 0;
    forall (tmp[i]) {
        if (tmp[i] != nil) {
            res[n] = i;
            n++;
        }
    }

    // Return sub-result
    return res[:n-1];
}

proc $vintersect(lhs,rhs)
{
    var res, tmp, n;
    var ptype;

    if (!lhs.isarray()) lhs = {lhs};
    if (!rhs.isarray()) rhs = {rhs};

    ptype = lhs.parent.parent;

    // Put all items of arr into a dictionary
    n = lhs.sizeof();
    tmp = new Dict(n);
    forall (lhs#[i]) tmp[lhs#[i]] = 0;
    forall (rhs#[i]) {
        var x = rhs#[i];
        if (tmp[x] != nil) {
            tmp[x] = 1;
        }
    }

    // Extract items of the dictionary into result
    res = new (ptype)(n);
    n = 0;
    forall (tmp[i]) {
        if (tmp[i] == 1) {
            res[n] = i;
            n++;
        }
    }

    // Return sub-result
    return res[:n-1];
}

proc $vwithout(lhs,rhs)
{
    var res, tmp, n;
    var ptype;

    if (!lhs.isarray()) lhs = {lhs};
    if (!rhs.isarray()) rhs = {rhs};
    if (!lhs.length()) return {};

    ptype = lhs.parent.parent;

    // Put all items of arr into a dictionary
    n = lhs.sizeof();
    tmp = new Dict(n);
    forall (lhs#[i]) tmp[lhs#[i]] = true;
    forall (rhs#[i]) {
        var x = rhs#[i];
        if (tmp[x] != nil) {
            tmp[x] = nil;
        }
    }

    // Extract items of the dictionary into result
    res = new (ptype)(n);
    n = 0;
    forall (lhs#[i]) {
        var x = lhs#[i];
        if (tmp[x] != nil) {
            res[n] = x;
            n++;
        }
    }

    // Return sub-result
    return res[:n-1];
}

proc $vmember(lhs,rhs)
{
    var res, tmp, n;

    if (!lhs.isarray()) lhs = {lhs};
    if (!rhs.isarray()) rhs = {rhs};

    // Put all items of rhs into a dictionary
    n = rhs.sizeof();
    tmp = new Dict(n);
    forall (rhs#[i]) tmp[rhs#[i]] = true;

    // Extract items of the dictionary into result
    res = new PackBool(lhs.shape());
    forall (lhs#[i]) {
        res#[i] = (tmp[lhs#[i]] != nil);
    }

    return res;
}

proc $sort()
{
    const qsort = proc(arr, cmp, lo, hi) {
        const partition_cmp = proc(cmp, arr, lo, hi) {
            const med3 = proc(cmp, a, b, c) {
                if (cmp(a, b) < 0) {                  // ? a ? b ?
                    if (cmp(b, c) < 0)      return b; // a b c
                    else if (cmp(a, c) < 0) return c; // a c b
                    else                    return a; // c a b
                }
                else {
                    if (cmp(b, c) > 0)      return b; // c b a
                    else if (cmp(a, c) > 0) return c; // b c a
                    else                    return a; // b a c
                }
            };
            var i = lo, j = hi, mid = lo + (hi - lo) / 2;
            var pivot = med3(cmp, arr[lo], arr[mid], arr[hi]);
            // Walk the array from the outside in, swapping
            // items less than the pivot with those greater than
            // the pivot. Original Hoare algorithm.
            while (true) {
                while (cmp(arr[i], pivot) < 0) i++;
                while (cmp(arr[j], pivot) > 0) j--;
                if (i >= j) return j;
                var tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp;
                i++; j--;
            }
        };
        const partition_pub = proc(pub, arr, lo, hi) {
            const med3 = proc(pub, a, b, c) {
                if (a.(pub)(b) < 0) {                  // ? a ? b ?
                    if (b.(pub)(c) < 0)      return b; // a b c
                    else if (a.(pub)(c) < 0) return c; // a c b
                    else                     return a; // c a b
                }
                else {
                    if (b.(pub)(c) > 0)      return b; // c b a
                    else if (a.(pub)(c) > 0) return c; // b c a
                    else                     return a; // b a c
                }
            };
            var i = lo, j = hi, mid = lo + (hi - lo) / 2;
            var pivot = med3(pub, arr[lo], arr[mid], arr[hi]);
            // Walk the array from the outside in, swapping
            // items less than the pivot with those greater than
            // the pivot. Original Hoare algorithm.
            while (true) {
                while (arr[i].(pub)(pivot) < 0) i++;
                while (arr[j].(pub)(pivot) > 0) j--;
                if (i >= j) return j;
                var tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp;
                i++; j--;
            }
        };
        const partition_op = proc(op1, op2, arr, lo, hi) {
            const med3 = proc(op1, op2, a, b, c) {
                if (a.(op1)(b)) {                  // ? a ? b ?
                    if (b.(op1)(c))      return b; // a b c
                    else if (a.(op1)(c)) return c; // a c b
                    else                 return a; // c a b
                }
                else {
                    if (b.(op2)(c))      return b; // c b a
                    else if (a.(op2)(c)) return c; // b c a
                    else                 return a; // b a c
                }
            };
            var i = lo, j = hi, mid = lo + (hi - lo) / 2;
            var pivot = med3(op1, op2, arr[lo], arr[mid], arr[hi]);
            while (true) {
                while (arr[i].(op1)(pivot)) i++;
                while (arr[j].(op2)(pivot)) j--;
                if (i >= j) return j;
                var tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp;
                i++; j--;
            }
        };
        var part;

        // Use a loop instead of tail recursion (see below)
        while (lo < hi) {

            switch (cmp) {
            case `< :
                part = partition_op(`<, `>, arr, lo, hi);
            case `> :
                part = partition_op(`>, `<, arr, lo, hi);
            default :
                if (cmp.parent == Public) {
                    part = partition_pub(cmp, arr, lo, hi);
                }
                else {
                    part = partition_cmp(cmp, arr, lo, hi);
                }
            }

            var loNum = part - lo;
            var hiNum = hi - part - 1;

            // Elimninate the tail recursion of the bigger partition
            if (loNum < hiNum) {
                (proc)(arr, cmp, lo, part);
                lo = part + 1;
            }
            else {
                (proc)(arr, cmp, part + 1, hi);
                hi = part;
            }
        }
    };

    var arr = arg(0);
    var cmp;

    switch (nargs()) {
    case 1 :    // arr.sort()
        cmp = `<;
    case 2 :    // arr.sort(cmp)
        cmp = arg(1);
    default :
        throw ArgCheck;
    }

    // Make a copy of the array since it gets overwritten
    arr = @arr;

    qsort(arr, cmp, 0, arr.length() - 1);

    return arr;
}

proc doTakeDrop(isTake, arr, count, axis)
{
    var axLen, countLen;
    var aShp = arr.shape();
    var aRnk = arr.rank();
    var aTyp = arr.parent.parent; // Need the non-size-qualified type
    var range = new Array(2*aRnk);
    var dstShp = new PackInt(aRnk);

    // Check for valid count
    var countArr = count.isarray();
    if (countArr) {
        // Must be a vector if it is an array
        if (count.rank() != 1) throw ShapeCheck;
        countLen = count.length();
    }

    // Create axis
    var ax = (axis != nil) ? axis : (countArr ? (countLen.iterate()) : 0);

    // Check for valid axis. Rely on OADL checks for RangeChecks on axis elems
    var axArr = ax.isarray();
    if (axArr) {
        // Must be a vector if it is an array
        if (ax.rank() != 1) throw ShapeCheck;
        // If we pass a vector count, shape must match
        axLen = ax.length();
        if (countArr && (countLen != axLen)) throw ShapeCheck;
    }

    // Compose "range" array
    var n = axArr ? axLen : (countArr ? countLen : 1);
    for (var i = 0; i < n; i++) {
        var ix = axArr ? ax[i] : ax;
        if (ix < 0) ix += aShp[i];
        if (range[2*ix] != nil) throw ShapeCheck;
        range[2*ix] = countArr ? count[i] : count;
    }

    var empty = false;
    for (var i = 0; i < aRnk; i++) {
        var x = range[2*i];
        if (x == nil) {
            // Not specified - accept entire dimension
            range[2*i  ] = 0;
            range[2*i+1] = aShp[i] - 1;
        }
        else if (x < 0) {
            if (isTake) {
                // "Take the last -x elements"
                range[2*i  ] = aShp[i] + x;
                range[2*i+1] = range[2*i] - x - 1;
            }
            else {
                // "Drop the last -x elements"
                range[2*i  ] = 0;
                range[2*i+1] = x + aShp[i] - 1;
            }
        }
        else {
            if (isTake) {
                // "Take the first x elements"
                range[2*i  ] = 0;
                range[2*i+1] = x - 1;
            }
            else {
                // "Drop the first x elements"
                range[2*i  ] = x;
                range[2*i+1] = aShp[i] - 1;
            }
        }
        // Check for empty result
        dstShp[i] = range[2*i+1] - range[2*i] + 1;
        if (dstShp[i] <= 0) empty = true;
    }
    if (empty) return new aTyp(dstShp);

    // Actually perform the subrange
    var srcRange = new Array(aRnk);
    var dstRange = new Array(aRnk+1);
    var overtake = false;
    var res;

    for (var i = 0; i < aRnk; i++) {
        var srcLower = range[2*i];
        var srcUpper = range[2*i+1];
        var dstLower = 0;

        if (srcLower < 0) {
            overtake = true;
            dstLower = -srcLower;
            srcLower = 0;
        }
        if (srcUpper >= aShp[i]) {
            overtake = true;
            srcUpper = aShp[i] - 1;
        }
        srcRange[i] = [srcLower:srcUpper];
        dstRange[i] = [dstLower:dstLower+srcUpper-srcLower];
    }

    arr = arr.`[]#(srcRange);
    if (overtake) {
        res = new aTyp(dstShp);
        dstRange[n] = arr;
        res.`[=]#(dstRange);
    }
    else {
        res = arr;
    }
    return res;
}

proc $vtake(arr, count)
{
    var na = nargs();
    if (na > 3) throw ArgCheck;

    return doTakeDrop(true, arr, count, (na==3)?arg(2):nil);
}

proc $vdrop(arr, count)
{
    var na = nargs();
    if (na > 3) throw ArgCheck;

    return doTakeDrop(false, arr, count, (na==3)?arg(2):nil);
}

proc $vreplicate(arr, repCounts, axArg)
{
    var ax = (nargs() > 2) ? axArg : -1;
    if (!arr.isarray()) arr = [arr];
    if (!repCounts.isarray()) repCounts = [repCounts];
    var numReps = repCounts.length();
    var shp = arr.shape();
    var newShp = @shp;
    var rnk = arr.rank();
    var strd = arr.stride();
    var newAxis, dst;
    var outerCount, innerCount;
    var srcIdx = 0, dstIdx = 0;
    var axLen;
    var srcScalar, repScalar;
    var expandNeg = false;
    var numNegRep = 0;

    // Uggh - type promotion bytes us
    var savePromote = TypePromote;
    TypePromote = Null;

    try {
        // Validate the axis
        if (ax.isarray()) throw ShapeCheck;
        if (ax < 0) ax += rnk;
        // We will rely on OADL itself to check that ax is now in range

        // Check for scalar src and repeat
        axLen = shp[ax];
        srcScalar = (axLen == 1);
        repScalar = (numReps == 1);

        // Check for simple vector of repeat counts
        if (repCounts.rank() > 1) throw ShapeCheck;
        if (repCounts.length() == 1) {
            // Replicate it to match array axis
            repCounts = repCounts.reshape(axLen);
            numReps = axLen;
        }

        // Calculate the size of the new axis and allocate the dst array
        newAxis = 0;
        forall (repCounts[i]) {
            if (!repCounts[i].isinteger()) throw TypeCheck;
            if (repCounts[i] < 0) numNegRep++;
            newAxis += repCounts[i].abs();
        }

        // If not a "scalar" repeat, has to be at least same length as
        // axis shape
        if ((numReps < axLen) && !repScalar) throw ShapeCheck;

        // If not a "scalar" axis, non-negative items must match axis length
        if ((numReps > axLen) && !srcScalar) {
            if ((numReps-numNegRep) != axLen) throw ShapeCheck;
            // Otherwise, we are expanding negative entries
            expandNeg = true;
        }

        newShp[ax] = newAxis;
        dst = new (arr.parent.parent)(newShp);

        outerCount = (ax > 0) ? (arr.sizeof() / strd[ax-1]) : 1;
        innerCount = strd[ax];

        while (outerCount) {
            outerCount--;

            forall (repCounts[i]) {
                var repCnt = repCounts[i];
                var srcIdxSave = srcIdx;

                if (repCnt < 0) {
                    // If we are expanding negatives, don't skip inner
                    // count.
                    if (!expandNeg) srcIdx += innerCount;
                    // Skipping the dst relies upon "new" appropriately
                    // initializing to the type proto
                    dstIdx += innerCount * -repCnt;
                }
                else if (repCnt == 0) {
                    // Skip this entire chunk
                    srcIdx += innerCount;
                }
                else {
                    // We will iterate over the src innerCnt "repCnt" times
                    for (var j = 0; j < repCnt; j++) {
                        srcIdx = srcIdxSave;

                        for (var k = 0; k < innerCount; k++) {
                            dst#[dstIdx] = arr#[srcIdx];
                            srcIdx++;
                            dstIdx++;
                        }
                    }
                }

                if (srcScalar) srcIdx = srcIdxSave;
            }
            if (srcScalar) srcIdx += innerCount;
        }

        TypePromote = savePromote;
        return dst;
    }
    catch (err) {
        TypePromote = savePromote;
        throw err;
    }
}

proc $arrcmp(a1,a2)
{
    if (!a1.isarray()) a1 = [a1];
    if (!a2.isarray()) a2 = [a2];
    var shp1 = a1.shape(),    shp2 = a2.shape();
    var rnk1 = a1.rank(),     rnk2 = a2.rank();
    var len1 = shp1[rnk1-1],  len2 = shp2[rnk2-1];
    var len = len1.min(len2), sz = sz1.min(sz2);
    var sz1, sz2;
    var i, i1, i2, incr;
    var result;
    var savePromote = TypePromote;
    TypePromote = Null;

    RECURSE_BEGIN();

    try {
        // Compare the last dimensions of the two arrays, accumulating
        // the subarray size as we go
        result = 0;
        sz1 = 1; sz2 = 1;
        i1 = rnk1-1; i2 = rnk2-1;
        while ((i1 >= 0) && (i2 >= 0)) {
            var n1 = shp1[i1], n2 = shp2[i2];
            sz1 *= n1; sz2 *= n2;
            if (n1 > n2) result = 1; // More in a1 than a2; a1 > a2 if sub =
            if (n1 < n2) result = -1; // Fewer in a1 than a2; a1 < a2 if sub =
            if (result) break;
            i1--; i2--;
        }
        if (result == 0) {
            // Check for extra higher dimensions
            if (i1 >= 0) result = 1;  // More in a1 than a2; a1 > a2 if sub =
            if (i2 >= 0) result = -1; // Fewer in a1 than a2; a1 < a2 if sub =
        }

        i1 = 0; i2 = 0;
        sz = sz1.min(sz2);
        incr = (sz1 < sz2) ? len1 : len2;
        for (i = 0; i < sz; i += incr) {
            for (var j = 0; j < len; j++) {
                var x1 = a1#[i1+j];
                var x2 = a2#[i2+j];
                if (x1.isarray() || x2.isarray()) {
                    var cmp = x1.arrcmp(x2);
                    if (cmp != 0) { result = cmp; throw nil; }
                }
                else {
                    try {
                        if (x1 > x2) { result = 1; throw nil; }
                        if (x1 < x2) { result = -1; throw nil; }
                    } catch (e) {
                        // Unordered. Return nil if not equal.
                        if (x1 != x2) { result = nil; throw nil; }
                    }
                }
            }
            if (len1 > len2) { result = 1; throw nil; }
            if (len1 < len2) { result = -1; throw nil; }
            i1 += len1;
            i2 += len2;
        }

    }
    catch (e, f, l)  {
        TypePromote = savePromote;
        if (e != nil) { RECURSE_THROW(e); }
    }

    // We have pre-calculated the result if the subarrays are equal
    TypePromote = savePromote;
    RECURSE_RETURN(result);
}

proc $arreqv(a1,a2)
{
    return a1.arrcmp(a2) == 0;
}

proc $arrneqv(a1,a2)
{
    return a1.arrcmp(a2) != 0;
}

proc $vpos(arr, items)
{
    var result;

    if (items.isstring() || !items.isarray()) {
        result = -1;
        forall (arr[j]) {
            if (items == arr[j]) {
                result = j;
                break;
            }
        }
    }
    else {
        result = (-1).reshape(items.shape());
        forall (items#[i]) {
            forall (arr[j]) {
                if (items#[i] == arr[j]) {
                    result#[i] = j;
                    break;
                }
            }
        }
    }
    return result;
}

proc $vloopinit(base, expr, numIdx, typ)
{
    var shp, rnk, res;

    if (typ) {
        // #[]
        shp = [expr.sizeof()];
        rnk = 1;
    }
    else {
        shp = expr.shape();
        rnk = expr.rank();
    }
    if (numIdx > rnk) throw TypeCheck;

    // Unlike non-intrinsic loopinit, we save the shape in the first slot
    // in the form of an array type. This automagically gets picked up
    // in the LOOPINCR opcode.
    setlocal(base, Array[shp]);

    res = false;
    for (var i = 0; i < numIdx; i++) {
        var offs = base+i+1;
        if (!shp[i]) res = true;
        setlocal(offs, 0);
        setlocal(offs+numIdx, 0);
    }
    return res;
}

// For minval, maxval, possibly others
proc $vapply(arr, op)
{
    if (typeof(op) == Public) {
        return foreach(arr#[i]) {arr#[i].(op)()};
    }
    else {
        return foreach(arr#[i]) {op(arr#[i])};
    }
}

proc $vlaminate(lhs, rhs, ax)
{
    var axis = (oadl::nargs() > 2) ? ax : -1;
    var lamin = (axis != Int(axis));
    var larr = lhs.isarray(), rarr = rhs.isarray();

    if (!(larr || rarr)) {
        // Neither is an array. Just concat them.
        if ((axis < -1) || (axis > 0)) throw oadl::RangeCheck;
        return lhs ## rhs;
    }

    var lshp = larr && lhs.shape(), rshp = rarr && rhs.shape();
    var lrnk = larr && lhs.rank(), rrnk = rarr && rhs.rank();
    var rnk;

    // Figure out significand rank
    rnk = (larr && rarr) ? lrnk.max(rrnk) : (larr ? lrnk : rrnk);

    // Standard axis interpretation for negatives, adjusted for
    // laminate semantics
    if (axis <= -1) axis += rnk;
    if (lamin) {
        if ((axis <= -1) || (axis >= (rnk+1))) throw oadl::RangeCheck;
        axis = Int(axis + 1);
    }
    else {
        if ((axis < 0) || (axis >= rnk)) throw oadl::RangeCheck;
        axis = Int(axis);
    }

    if (larr && rarr) {
        // Both arrays
        if (lamin) {
            // Shape must match exactly
            if (lshp != rshp) throw oadl::ShapeCheck;
        }
        else {
            // Allow one or the other to be one less in rank,
            // substituting a one for the axis
            if ((lrnk+1) == rrnk) {
                lshp = rshp[:axis-1] ## 1 ## rshp[axis+1:];
                lhs = lhs.reshape(lshp);
            }
            else if (lrnk == (rrnk+1)) {
                rshp = lshp[:axis-1] ## 1 ## lshp[axis+1:];
                rhs = rhs.reshape(rshp);
            }
            else if (lrnk != rrnk) {
                throw oadl::ShapeCheck;
            }

            // Check shape compatibility
            forall (lshp[i]) {
                if ((i != axis) && (lshp[i] != rshp[i])) throw oadl::ShapeCheck;
            }
        }
    }
    else if (!larr) {
        // Left scalar
        lshp = @rshp;
        if (!lamin) lshp[axis] = 1;
        lhs = lhs.reshape(lshp);
    }
    else {
        // Right scalar
        rshp = @lshp;
        if (!lamin) rshp[axis] = 1;
        rhs = rhs.reshape(rshp);
    }

    // Calculate destination type
    var ltyp = ?? lhs, rtyp = ?? rhs, typ = Array;
    if (ltyp == rtyp) {
        // Both packed or both arrays, and they match
        typ = ltyp;
    }
    else {
        var l0 = lhs#[0], r0 = rhs#[0];
        if (l0.isnumeric() && r0.isnumeric()) {
            // All numeric - do the promotion
            typ = l0.promote(r0).packtype();
        }
        else if (l0.ischar() && r0.ischar()) {
            // All character - since they are not equal,
            // it must be wchar
            typ = PackWideChar;
        }
    }

    // Don't want calculator type promotion to interfere here
    var savePromote = oadl::TypePromote;
    oadl::TypePromote = Null;

    // Allocate the dest array
    var shp = lamin
                ? ((lshp[:axis-1] ## 2 ## lshp[axis:]))
                : ((lshp[:axis-1] ## (lshp[axis]+rshp[axis]) ## lshp[axis+1:]));
    var res = new typ(shp);

    // Iterate over dest, copying elements of lhs or rhs
    var idx = (0).reshape(res.rank());
    if (lamin) {
        forall (res#[i]) {
            var tidx = idx[:axis-1] ## idx[axis+1:];
            res#[i] = idx[axis] ? rhs.`[]#(tidx) : lhs.`[]#(tidx);
            idx = idx.increment(shp);
        }
    }
    else {
        // roffs is zeros except for at the axis position
        var lax = lshp[axis];
        var roffs = idx[:axis-1] ## lax ## idx[axis+1:];
        forall (res#[i]) {
            res#[i] = (idx[axis] >= lax) ? rhs.`[]#(idx-roffs) : lhs.`[]#(idx);
            idx = idx.increment(shp);
        }
    }

    // Restore type promotion
    oadl::TypePromote = savePromote;
    return res;
}

proc $vencode(lhs,rhs)
{
    const doenc = proc(res, roffs, rstrd, base, boffs, bstrd, len, val) {
        for (var i = len-1; i >= 0; i--) {
            var bi = base#[boffs+i*bstrd];
            var bdiv, bmod;
            if (bi) {
                bmod = val % bi;
                bdiv = (val - bmod) / bi;
            }
            else {
                bmod = val;
                bdiv = 0;
            }
            val = bdiv;
            res#[roffs+i*rstrd] = bmod;
        }
    };
    var larr = lhs.isarray(), rarr = rhs.isarray();
    if (!larr) lhs = [lhs];
    if (!rarr) rhs = [rhs];
    var lshp = lhs.shape(), rshp = rhs.shape();
    var lsiz = lhs.sizeof(), rsiz = rhs.sizeof();

    // Figure out result type
    var ltyp = lhs.arrbase(), rtyp = rhs.arrbase();
    var typ = ltyp.promote(rtyp);
    typ = (typ.isnumeric()) ? typ.packtype() : Array;

    // Allocate result array
    var shp = rarr ? (lshp ## rshp) : lshp;
    var res = new typ(shp);

    // Do the encode
    var len = lshp[0];
    var dcnt = res.sizeof() / len;
    var lcnt = lsiz / len;
    for (var ilhs = 0; ilhs < lcnt; ilhs++) {
        forall (rhs#[irhs]) {
            doenc(res, ilhs*rsiz + irhs, dcnt,
                  lhs, ilhs,             lcnt,
                  len, rhs#[irhs]);
        }
    }
    return (larr || rarr) ? res : res#[0];
}

proc $vdecode(lhs,rhs)
{
    const dodec = proc(base, boffs, arr, aoffs, astrd, len) {
        var res = 0;
        for (var i = 0; i < len; i++) {
            res = res*base#[boffs+i] + arr#[aoffs+astrd*i];
        }
        return res;
    };
    var larr = lhs.isarray(), rarr = rhs.isarray();
    var lshp, rshp, lrnk, rrnk;
    var llen, len;

    if (larr) {
        lshp = lhs.shape(); lrnk = lhs.rank(); llen = lshp[lrnk-1];
        if (rarr) {
            rshp = rhs.shape(); rrnk = rhs.rank();
            len = rshp[0];
        }
        else {
            rhs = rhs.reshape(llen);
            rshp = rhs.shape(); rrnk = rhs.rank();
            len = llen;
        }
    }
    else if (rarr) {
        rshp = rhs.shape(); rrnk = rhs.rank();
        len = rshp[0];
        lhs = lhs.reshape(len);
        lshp = lhs.shape(); lrnk = lhs.rank();
        llen =  len;
    }
    else {
        // Both scalars TBD
        lhs = lhs.reshape(1); rhs = rhs.reshape(1);
        lshp = lhs.shape(); lrnk = lhs.rank();
        rshp = rhs.shape(); rrnk = rhs.rank();
        llen = 1; len = 1;
    }

    // Extract and check the rank
    if (llen != len) throw oadl::ShapeCheck;

    // Figure out result type
    var ltyp = lhs.arrbase(), rtyp = rhs.arrbase();
    var typ = ltyp.promote(rtyp);
    typ = (typ.isnumeric()) ? typ.packtype() : Array;

    // Allocate the result array
    var shp = lshp[:lrnk-2] ## rshp[1:];
    var rnk = shp.length();
    var res = rnk ? new typ(shp) : new typ(1);

    var lsiz = lhs.sizeof() / len;
    var rsiz = rhs.sizeof() / len;

    forall (res#[i]) {
        res#[i] = dodec(lhs, (i%lsiz)*len, rhs, (i%rsiz), rsiz, len);
    }
    return rnk ? res : res#[0];
}

// These four implement more complicated type conversion assignments
// (for example, heterogeneous arrays, overloaded conversion operator, etc.)
// Note that the setl/setg/setprop variants do a subsequent typecheck
// after the conversion; this handles the case where an overloaded
// operator returns something other than what was expected, since the
// setlocal/etc. calls don't do any type checking

// For OP_SETL_TC
proc $tcsetl(typ, offs, val)
{
    val = val => typ;
    typecheck(typ, val);
    return setlocal(offs, val);
}

// For OP_SETG_TC
proc $tcsetg(typ, offs, val)
{
    val = val => typ;
    typecheck(typ, val);
    return setglobal(offs, val);
}

// For OP_SETP_TC
proc $tcsetprop(typ, offs, val)
{
    val = val => typ;
    typecheck(typ, val);
    return setprop(offs, val);
}

// For OP_SETPUB (tc)
proc $tcsetpub(obj, prop, val, typ)
{
    val = val => typ;
    typecheck(typ, val);
    obj.(prop) = val;
}

// Routine to dump an object to a file - for #list / #edit
proc $doprint(f, named, val, lev, semi)
{
    const printer = proc(f, val, lev) {
        const printrow = proc(f,val,lch,rch,prc,lev) {
            f.say(lch);
            forall (val[i]) {
                if (i) f.say(',');
                prc(f,val[i],lev);
            }
            f.say(rch);
        };
        const printchar = proc(f,val,pre,del) {
            if (pre) f.say(pre);
            if (del) f.say(del);
            switch (val) {
            case '\"', '\'', '\\' :
                f.say('\\',val);
            default :
                f.say(val);
            }
            if (del) f.say(del);
        };
        const printstr = proc(f,val,pre,del,prc) {
            if (pre) f.say(pre);
            f.say(del);
            forall (val[i]) prc(f,val[i],nil,nil);
            f.say(del);
        };
        const indent = proc(f, n) {
            for (var i = 0; i < n; i++) f.say(' ');
        };

        switch (typeof(val)) {
        case Half :       f.say(val, 'h');
        case Double :     f.say(val, 'd');
        case Byte :       f.say(val, 'b');
        case Ubyte :      f.say(val, 'u','b');
        case Short :      f.say(val, 's');
        case Ushort :     f.say(val, 'u','s');
        case Uint :       f.say(val, 'u');
        case Long :       f.say(val, 'l');
        case Ulong :      f.say(val, 'u','l');
        case Char :       printchar(f,val,nil,'\'');
        case WideChar :   printchar(f,val,'L','\'');
        case String :     printstr(f,val,nil,'"',printchar);
        case WideString : printstr(f,val,'L','"',printchar);
        case List :       printrow(f,val,'{','}',(proc),lev);
        case Dict :
            f.say('<','<','<');
            var n = 0;
            forall (val[i]) {
                if (n) f.say(',');
                (proc)(f,i); f.say(','); (proc)(f,val[i],lev);
                n++;
            }
            f.say('>','>','>');
        case Array :
            if (val.rank() == 2) {
                f.say('[');
                forall (val[i]) {
                    if (i) {
                        f.say(',','\n');
                        indent(f, lev);
                    }
                    printrow(f,val[i],'[',']',(proc),lev);
                }
                f.say(']');
            }
            else {
                // Rank must be greater than 2
                f.say('[');
                forall (val[i]) {
                    if (i) {
                        f.say(',','\n');
                        indent(f, lev);
                    }
                    (proc)(f,val[i],lev+1);
                }
                f.say(']');
            }
        case Public :
            // Need to print "public::" prefix
            f.say('p','u','b','l','i','c',':',':',val);
        default :
            if (val.isarray()) {
                // Packed array
                if (val.rank() == 1) {
                    printrow(f,val,'[',']',(proc),lev);
                }
                else {
                    f.say('[');
                    forall (val[i]) {
                        if (i) {
                            f.say(',','\n');
                            indent(f, lev+1);
                        }
                        (proc)(f,val[i],lev+1);
                    }
                    f.say(']');
                }
                return;
            }
            else {
                // Else default print is OK
                f.say(val);
            }
        }
    };

    RECURSE_BEGIN();

    if (named && (typeof(val) == Object)) {
        f.say('{','\n');
        forall (val.(i)) {
            if (i != public::parent) {
                var nam = oadl::pubname(i);
                var x = val.(i);
                f.say(' ',' ',' ',' ', nam, ' ','=',' ');
                printer(f,x,7+nam.length());
                f.say('\n');
            }
        }
        f.say('}','\n');
    }
    else {
        printer(f,val,lev);
        if (semi) f.say(semi);
        f.say('\n');
    }

    RECURSE_END();
}