Submission 4da994da...

ChallengeInteger sorting
Submitterwicketh.eth
Submitted at2018-29-29
Gas used133839
pragma solidity ^0.4.23;

contract Sort {
    
    function () external payable { assembly {
    
    // function sort(uint[] input) external payable returns(uint[]) { assembly {
        
        // @author Remco Bloemen <[email protected]>
        
        // No  unrepeated sorted input
        // Yes repeated sorted input
        // No  unrepeated reverse sorted input
        // Yes repeated reverse sorted input
        // Yes vector of all the same input
        // max(input size) = 299
        
        mstore(0x40, 0) // Set all memory to zero
        
        let temp1
        let temp2
        let addr1
        let addr2
        let scale
        let i
        
        // Special case for zero or one value
        jumpi(trivial, lt(calldatasize, 0x84))
        
        // Radix sort
        // Size of bucket table:
        //                                      Competition
        //  60 * 32 = 1920    gas: 337644        208971
        //  80 * 32 = 2560    gas: 323985        194330
        // 100 * 32 = 3200    gas: 321639        193251
        // 110 * 32 = 3520    gas: 318570        190720
        // 120 * 32 = 3840    gas: 315349        186302
        // 130 * 32 = 4160    gas: 314771        188363
        // 140 * 32 = 4480    gas: 314657        186329
        // 160 * 32 = 5120    gas: 317469        189141
        // 30 + 256 * 2 = 542
        // 64
        
        ////////////////////////////////////////////////////
        // First pass
        ////////////////////////////////////////////////////
        // * find upper bound to values
        // * check for sorted input
        // * check for reverse sorted input
        temp1 := calldataload(0x44)
        scale := temp1
        i := 0x64
    l1_equal: // All entries up untill i are the same
        temp2 := calldataload(i)
        i := add(i, 32)
        jumpi(l1_equal, and(lt(i, calldatasize), eq(temp1, temp2)))
        jumpi(trivial, eq(i, calldatasize))
    l1_neq:
        jumpi(l1_reverse, gt(temp1, temp2))
        scale := temp2
    l1_forward:
        temp1 := temp2
        temp2 := calldataload(i)
        scale := or(scale, temp2)
        i := add(i, 32)
        jumpi(l1_forward, and(lt(i, calldatasize), lt(temp1, add(temp2, 1))))
        jumpi(trivial, eq(i, calldatasize))
        jump(l1)
    l1_reverse:
        temp1 := temp2
        temp2 := calldataload(i)
        scale := or(scale, temp2)
        i := add(i, 32)
        jumpi(l1_reverse, and(lt(i, calldatasize), lt(temp2, add(temp1, 1))))
        jumpi(reverse, eq(i, calldatasize))
    l1:
        temp2 := calldataload(i)
        scale := or(scale, temp2)
        i := add(i, 32)
        jumpi(l1, lt(i, calldatasize))
        
        // Compute scaling factor (twice what it should be, we mask)
        scale := div(add(scale, 255), 256)
        
        ////////////////////////////////////////////////////
        // Second pass: count buckets
        ////////////////////////////////////////////////////
        0x44
    l2:
        // temp1 := sub(510, and(div(calldataload(i), scale), 0xFFFFFE))
        scale dup2 calldataload div 0xfffffe and 254 sub
        // mstore8(add(temp1, 31), add(mload(temp1), 1))
        dup1 mload 1 add swap1 31 add mstore8
        // i := add(i, 32)
        32 add
        // jumpi(l2, lt(i, calldatasize))
        dup1 calldatasize gt l2 jumpi
        pop
        
        ////////////////////////////////////////////////////
        // Bucket pass: compute running sum of the buckets
        ////////////////////////////////////////////////////
        
        0x0001000000000000000000000000000000000000000000000000000000000000
        0x0001000100010001000100010001000100010001000100010001000100010001
        0x120
        0x100 mload 32 mul add dup2 mul dup1 0x100 mstore dup3 swap1 div
        0xe0 mload 32 mul add dup2 mul dup1 0xe0 mstore dup3 swap1 div
        0xc0 mload 32 mul add dup2 mul dup1 0xc0 mstore dup3 swap1 div
        0xa0 mload 32 mul add dup2 mul dup1 0xa0 mstore dup3 swap1 div
        0x80 mload 32 mul add dup2 mul dup1 0x80 mstore dup3 swap1 div
        0x60 mload 32 mul add dup2 mul dup1 0x60 mstore dup3 swap1 div
        0x40 mload 32 mul add dup2 mul dup1 0x40 mstore dup3 swap1 div
        0x20 mload 32 mul add dup2 mul dup1 0x20 mstore dup3 swap1 div
        0x0 mload 32 mul add dup2 mul dup1 0x0 mstore dup3 swap1 div
        pop pop pop
        
        if 0 { // DEBUG: Dump the buckets to output
        i := 0x00
        ldebug:
        mstore(add(mul(i, 16), 0x1000), and(mload(i), 0xFFFF))
        i := add(i, 2)
        jumpi(ldebug, lt(i, 256))
        mstore(sub(0x1000, 0x40), 0x20)
        mstore(sub(0x1000, 0x20), 256)
        return(sub(0x1000, 0x40), add(64, mul(256, 32)))
        }

        ////////////////////////////////////////////////////
        // Third pass: move to buckets
        ////////////////////////////////////////////////////
        temp1 := 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0000
        i := 0x44
    l4: {
        let value  := calldataload(i)
        let bucket := sub(254, and(div(value, scale), 0xFFFFFE))
        let bval   := mload(bucket)
        let addr   := sub(and(bval, 0xFFFF), 32)
        mstore(bucket, or(and(bval, temp1), addr))
        mstore(addr, value)
        i := add(i, 32)
        }
        jumpi(l4, lt(i, calldatasize))
        
        ////////////////////////////////////////////////////
        // Fourth pass (buckets): sort buckets
        ////////////////////////////////////////////////////
        
        // At this point unsorted groups have max 5 elements
        
        i := 256
        addr1 := 0x120
    l5:
        i := sub(i, 2)
        jumpi(last, slt(i, 0))
        addr2 := addr1
        addr1 := and(mload(i), 0xFFFF)
        temp1 := sub(addr1, addr2)
        jumpi(l5, lt(temp1, 64))
        jumpi(sort2, eq(temp1, 64))
        jumpi(sort3, eq(temp1, 96))
        jumpi(sort4, eq(temp1, 128))
        jumpi(sort5, eq(temp1, 160))
        jumpi(sort6, eq(temp1, 192))
        jumpi(sort7, eq(temp1, 224))
        jumpi(sort8, eq(temp1, 256))
        jump(explode)
        jump(l5)
        
    last:
        addr2 := addr1
        addr1 := add(sub(calldatasize, 0x44), 0x120)
        jumpi(done, eq(addr1, addr2))
        temp1 := sub(addr1, addr2)
        jumpi(done, eq(temp1, 32))
        jumpi(sort2, eq(temp1, 64))
        jumpi(sort3, eq(temp1, 96))
        jumpi(sort4, eq(temp1, 128))
        jumpi(sort5, eq(temp1, 160))
        jumpi(sort6, eq(temp1, 192))
        jumpi(sort7, eq(temp1, 224))
        jumpi(sort8, eq(temp1, 256))
        jump(explode)
        jump(l5)
        
    done:
        mstore(sub(0x120, 0x40), 0x20)
        mstore(sub(0x120, 0x20), calldataload(0x24))
        return(sub(0x120, 0x40), sub(calldatasize, 4))
        
    sort2: {
            let a := mload(addr2)
            let b := mload(add(addr2, 32))
            jumpi(end, gt(b, a))
            mstore(addr2, b)
            mstore(add(addr2, 32), a)
        end:
        }
        jump(l5)
    
    sort3: {
            let a := mload(addr2)
            let b := mload(add(addr2, 32))
            let c := mload(add(addr2, 64))
            jumpi(case1, gt(b, a))
            a b =: a =: b
        case1:
            jumpi(case3, gt(c, b))
            b c =: b =: c
            jumpi(case3, gt(b, a))
            a b =: a =: b
        case3:
            mstore(addr2, a)
            mstore(add(addr2, 32), b)
            mstore(add(addr2, 64), c)
        }
        jump(l5)
    
    sort4: // [[1 2][3 4][1 3][2 4][2 3]]
        addr2 96 add mload
        addr2 64 add mload
        addr2 32 add mload
        addr2 mload
        dup1 dup3 lt skip_4_1 jumpi swap1 skip_4_1:
        dup3 dup5 lt skip_4_2 jumpi swap2 swap3 swap2 skip_4_2:
        dup1 dup4 lt skip_4_3 jumpi swap2 skip_4_3:
        dup2 dup5 lt skip_4_4 jumpi swap1 swap3 swap1 skip_4_4:
        dup2 dup4 lt skip_4_5 jumpi swap1 swap2 swap1 skip_4_5:
        addr2 96 add mstore
        addr2 64 add mstore
        addr2 32 add mstore
        addr2 mstore
        jump(l5)

    sort5: // [[1 2][3 4][1 3][2 5][1 2][3 4][2 3][4 5][3 4]]
        addr2 128 add mload
        addr2 96 add mload
        addr2 64 add mload
        addr2 32 add mload
        addr2 mload
        dup1 dup3 lt skip_5_1 jumpi swap1 skip_5_1:
        dup3 dup5 lt skip_5_2 jumpi swap2 swap3 swap2 skip_5_2:
        dup1 dup4 lt skip_5_3 jumpi swap2 skip_5_3:
        dup2 dup6 lt skip_5_4 jumpi swap1 swap4 swap1 skip_5_4:
        dup1 dup3 lt skip_5_5 jumpi swap1 skip_5_5:
        dup3 dup5 lt skip_5_6 jumpi swap2 swap3 swap2 skip_5_6:
        dup2 dup4 lt skip_5_7 jumpi swap1 swap2 swap1 skip_5_7:
        dup4 dup6 lt skip_5_8 jumpi swap3 swap4 swap3 skip_5_8:
        dup3 dup5 lt skip_5_9 jumpi swap2 swap3 swap2 skip_5_9:
        addr2 128 add mstore
        addr2 96 add mstore
        addr2 64 add mstore
        addr2 32 add mstore
        addr2 mstore
        jump(l5)
        
    sort6: // [[1 2][3 4][5 6][1 3][2 5][4 6][1 2][3 4][5 6][2 3][4 5][3 4]]
        addr2 160 add mload
        addr2 128 add mload
        addr2 96 add mload
        addr2 64 add mload
        addr2 32 add mload
        addr2 mload
        dup1 dup3 lt skip_6_1 jumpi swap1 skip_6_1:
        dup3 dup5 lt skip_6_2 jumpi swap2 swap3 swap2 skip_6_2:
        dup5 dup7 lt skip_6_3 jumpi swap4 swap5 swap4 skip_6_3:
        dup1 dup4 lt skip_6_4 jumpi swap2 skip_6_4:
        dup2 dup6 lt skip_6_5 jumpi swap1 swap4 swap1 skip_6_5:
        dup4 dup7 lt skip_6_6 jumpi swap3 swap5 swap3 skip_6_6:
        dup1 dup3 lt skip_6_7 jumpi swap1 skip_6_7:
        dup3 dup5 lt skip_6_8 jumpi swap2 swap3 swap2 skip_6_8:
        dup5 dup7 lt skip_6_9 jumpi swap4 swap5 swap4 skip_6_9:
        dup2 dup4 lt skip_6_10 jumpi swap1 swap2 swap1 skip_6_10:
        dup4 dup6 lt skip_6_11 jumpi swap3 swap4 swap3 skip_6_11:
        dup3 dup5 lt skip_6_12 jumpi swap2 swap3 swap2 skip_6_12:
        addr2 160 add mstore
        addr2 128 add mstore
        addr2 96 add mstore
        addr2 64 add mstore
        addr2 32 add mstore
        addr2 mstore
        jump(l5)
        
    sort7: // [[1 2][0 2][0 1][3 4][5 6][3 5][4 6][4 5][0 4][0 3][1 5][2 6][2 5][1 3][2 4][2 3]]
        addr2 192 add mload
        addr2 160 add mload
        addr2 128 add mload
        addr2 96 add mload
        addr2 64 add mload
        addr2 32 add mload
        addr2 mload
        dup2 dup4 lt skip_7_1 jumpi swap1 swap2 swap1 skip_7_1:
        dup1 dup4 lt skip_7_2 jumpi swap2 skip_7_2:
        dup1 dup3 lt skip_7_3 jumpi swap1 skip_7_3:
        dup4 dup6 lt skip_7_4 jumpi swap3 swap4 swap3 skip_7_4:
        dup6 dup8 lt skip_7_5 jumpi swap5 swap6 swap5 skip_7_5:
        dup4 dup7 lt skip_7_6 jumpi swap3 swap5 swap3 skip_7_6:
        dup5 dup8 lt skip_7_7 jumpi swap4 swap6 swap4 skip_7_7:
        dup5 dup7 lt skip_7_8 jumpi swap4 swap5 swap4 skip_7_8:
        dup1 dup6 lt skip_7_9 jumpi swap4 skip_7_9:
        dup1 dup5 lt skip_7_10 jumpi swap3 skip_7_10:
        dup2 dup7 lt skip_7_11 jumpi swap1 swap5 swap1 skip_7_11:
        dup3 dup8 lt skip_7_12 jumpi swap2 swap6 swap2 skip_7_12:
        dup3 dup7 lt skip_7_13 jumpi swap2 swap5 swap2 skip_7_13:
        dup2 dup5 lt skip_7_14 jumpi swap1 swap3 swap1 skip_7_14:
        dup3 dup6 lt skip_7_15 jumpi swap2 swap4 swap2 skip_7_15:
        dup3 dup5 lt skip_7_16 jumpi swap2 swap3 swap2 skip_7_16:
        addr2 192 add mstore
        addr2 160 add mstore
        addr2 128 add mstore
        addr2 96 add mstore
        addr2 64 add mstore
        addr2 32 add mstore
        addr2 mstore
        jump(l5)
        
    sort8:
        addr2 224 add mload
        addr2 192 add mload
        addr2 160 add mload
        addr2 128 add mload
        addr2 96 add mload
        addr2 64 add mload
        addr2 32 add mload
        addr2 mload
        dup1 dup3 lt skip_8_1 jumpi swap1 skip_8_1:
        dup3 dup5 lt skip_8_2 jumpi swap2 swap3 swap2 skip_8_2:
        dup1 dup4 lt skip_8_3 jumpi swap2 skip_8_3:
        dup2 dup5 lt skip_8_4 jumpi swap1 swap3 swap1 skip_8_4:
        dup2 dup4 lt skip_8_5 jumpi swap1 swap2 swap1 skip_8_5:
        dup5 dup7 lt skip_8_6 jumpi swap4 swap5 swap4 skip_8_6:
        dup7 dup9 lt skip_8_7 jumpi swap6 swap7 swap6 skip_8_7:
        dup5 dup8 lt skip_8_8 jumpi swap4 swap6 swap4 skip_8_8:
        dup6 dup9 lt skip_8_9 jumpi swap5 swap7 swap5 skip_8_9:
        dup6 dup8 lt skip_8_10 jumpi swap5 swap6 swap5 skip_8_10:
        dup1 dup6 lt skip_8_11 jumpi swap4 skip_8_11:
        dup2 dup7 lt skip_8_12 jumpi swap1 swap5 swap1 skip_8_12:
        dup2 dup6 lt skip_8_13 jumpi swap1 swap4 swap1 skip_8_13:
        dup3 dup8 lt skip_8_14 jumpi swap2 swap6 swap2 skip_8_14:
        dup4 dup9 lt skip_8_15 jumpi swap3 swap7 swap3 skip_8_15:
        dup4 dup8 lt skip_8_16 jumpi swap3 swap6 swap3 skip_8_16:
        dup3 dup6 lt skip_8_17 jumpi swap2 swap4 swap2 skip_8_17:
        dup4 dup7 lt skip_8_18 jumpi swap3 swap5 swap3 skip_8_18:
        dup4 dup6 lt skip_8_19 jumpi swap3 swap4 swap3 skip_8_19:
        addr2 224 add mstore
        addr2 192 add mstore
        addr2 160 add mstore
        addr2 128 add mstore
        addr2 96 add mstore
        addr2 64 add mstore
        addr2 32 add mstore
        addr2 mstore
        jump(l5)
        
        
        ////////////////////////////////////////////////////
        ////////////////////////////////////////////////////
        ////////////////////////////////////////////////////
        
    trivial:
        calldatacopy(0, 4, calldatasize)
        return(0, sub(calldatasize, 4))
        
    reverse:
        calldatacopy(0, 4, 0x40)
        i := 0x44
        temp1 := sub(calldatasize, 36)
        
    lr:
        mstore(temp1, calldataload(i))
        i := add(i, 32)
        temp1 := sub(temp1, 32)
        jumpi(lr, lt(i, calldatasize))
        
        return(0, sub(calldatasize, 4))
        
    explode:
        selfdestruct(0)
    }}
}