Submission 967fc091...

ChallengeInteger sorting
Submitted at2018-31-28
Gas used178500
 * This file is part of the 1st Solidity Gas Golfing Contest.
 * This work is licensed under Creative Commons Attribution ShareAlike 3.0.

pragma solidity ^0.4.23;

contract Sort {
     * @dev Sorts a list of integers in ascending order.
     * The input list may be of any length.
     * @param input The list of integers to sort.
     * @return The sorted list.

    // It's a sorting algorithm! I've gone for quicksort because mergesort consumes
    // 2x memory and memory is expensive! I tried thinking of ways to implement an
    // efficient counting/radix sort, but considering the numbers being sorted can
    // be up to 256 bits none of the ideas I came up with were remotely competative with
    // quicksort.

    // This thing uses the Hoare partition scheme. I also test for worst-case
    // 'perfect' runs of sorted or reverse-sorted data. A bit cheeky as in the
    // general-case the overheads would probably not be worth the cost, but hey,
    // this is code golf!

    // When the partition sizes get down to 8 items or less I switch to an insertion
    // sort algorithm. I believe optimized quicksorts usually switch to insertion sort
    // at around 32/64 values, but I went with 8 because that's the maximum number of
    // values that can be sorted with a pure stack-based implementation, which is super fast.
    // I also use a lookup table to convert partition size to a jump destination for the
    // relevant insertion sort, which is probably why Remix cheerfully crashes whenever I try
    // to debug a transaction :/

    // For the pivot, we can get a pseudorandom index by multiplying the amount of available gas
    // by a large prime, taking the modulus mod (difference between first and last partition index),
    // then masking off the 5 least significant bits and adding to the first partition index to get
    // a memory index to load the pivot from
    function sort(uint[] input) external view returns(uint[]) {
        assembly {
            calldatacopy(0x100, 0x04, calldatasize)
            if eq(calldatasize, 0x44) {
                return(0x00, 0x40)
            mstore(0x00, finished_insertion_sort)
            mstore(0x20, sort_two)
            mstore(0x40, sort_three)
            mstore(0x60, sort_four)
            mstore(0x80, sort_five)
            mstore(0xa0, sort_six)
            mstore(0xc0, sort_seven)
            mstore(0xe0, sort_eight)
            let it := 0x160
            let size := add(0x100, sub(calldatasize, 0x24))
            let end := add(size, 0x20)
            let prev := mload(0x140)
                jumpi(check_if_perfect_skip, gt(prev, mload(it)))
                prev := mload(it)
                it := add(it, 0x20)
                jumpi(check_if_perfect_start, lt(it, end))
                return(0x100, add(end, 0x40)) // hey it's perfect!
                it := 0x160
                prev := mload(0x140)
                jumpi(check_if_reverse_skip, lt(prev, mload(it)))
                prev := mload(it)
                it := add(it, 0x20)
                jumpi(check_if_reverse_start, lt(it, end))
                it := size
                end := msize
                for {} gt(it, 0x120) {} {
                    mstore(msize, mload(it))
                    it := sub(it, 0x20)
                mstore(sub(end, 0x20), mload(0x120))
                mstore(sub(end, 0x40), 0x20)
                return(sub(end, 0x40), add(size, 0x60)) // hey it's reversed!

            // one run of recursion will add the indices of the next set of partition start/end points on the stack
            // we start by adding 0 onto the stack, at the start of our routine we check whether the next stack item is 0 to
            // evaluate whether to proceed.
            0x140 size           // p_high p_low 0

                0xe0 dup3 dup3 sub gt asm_partition jumpi

                dup2 swap1 sub mload jump // p_low
                // dup1 dup3 lt asm_partition jumpi
                dup1 asm_evaluate_stack jumpi
                asm_finish_sort jump
    // partition
            dup2 dup2 sub gas mod 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0 and dup3 add
            // dup2 dup2 sub 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 gas mulmod 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0 and dup3 add
            // 0x20 0x20 dup4 dup4 sub div 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 gas mulmod mul dup3 add
            // dup2
            mload               // p p_high p_low

            0x20 dup4 sub       // i p p_high p_low
            dup3 0x20 add       // j i p p_high p_low
            asm_decrease_j jump

                                // j i p p_high p_low
                dup1 mload      // vj j i p p_high p_low
                dup3 mload      // vi vj j i p p_high p_low
                dup3 mstore     // vj j i p p_high p_low
                dup3 mstore     // j i p p_high p_low

                0x20 swap1 sub  // j i p p_high p_low
                dup3 dup2 mload gt asm_decrease_j jumpi
            swap1               // i j p p_high p_low
                0x20 add
                dup3 dup2 mload lt asm_increase_i jumpi
            dup1 dup3 lt asm_partition_swap jumpi

            swap2 pop pop       // p' p_high p_low
            dup1 0x20 add       // p'' p' p_high p_low
            swap3 swap1         // p' p_low p_high p''

            asm_evaluate_stack jump

            0x04 calldatasize sub 0x100 return
            pop pop pop
            // hoareQuickSort(0x40, size)
            // return(0x00, add(end, 0x40))

                dup1 0x20 add mload // a
                dup2 mload          // b a
                dup2 dup2 lt        // 0 1 p
                two_0_1_skip jumpi
            two_0_1_skip:               // (max) (min) p
                dup3 mstore
                dup2 0x20 add mstore

                dup1 0x40 add       // p2
                dup2 0x20 add       // p1 p2 p
                dup2 mload          // 2 p1 p2 p
                dup2 mload          // 1 2 p1 p2 p
                dup5 mload          // 0 1 2 p1 p2 p

                dup3 dup3 lt three_1_2_skip jumpi
                swap2               // 2 1 0
                swap1               // 1 2 0
                swap2               // 0 2 1
            three_1_2_skip:         // 0 1 2
                dup3 dup2 lt three_0_2_skip jumpi
            three_0_2_skip:         // 0 1 2
                dup2 dup2 lt three_0_1_skip jumpi
            three_0_1_skip:         // 0 1 2 p1 p2 p
            dup6 mstore             // 1 2 p1 p2 p
            swap3 mstore            // p1 1 p
            mstore                  // p


                dup1 0x60 add
                dup2 0x40 add
                dup3 0x20 add
                dup3 mload          // 3 p1 p2 p3 p
                dup3 mload
                dup3 mload          // 1 2 3 p1 p2 p3 p
                dup7 mload          // 0 1 2 3 p1 p2 p3

            dup2 dup2 lt four_0_1_skip jumpi
            four_0_1_skip:          // 0 1 2 3 p
            dup4 dup4 lt four_2_3_skip jumpi
            swap3                   // 3 1 2 0
            swap2                   // 2 1 3 0
            swap3                   // 0 1 3 2
            four_2_3_skip:          // 0 1 2 3 p
            dup3 dup2 lt four_0_2_skip jumpi
            four_0_2_skip:          // 0 1 2 3 p
            dup4 dup3 lt four_1_3_skip jumpi
            swap3 swap1 swap3
            four_1_3_skip:          // 0 1 2 3 p
            dup3 dup3 lt four_1_2_skip jumpi
            swap1 swap2 swap1
            four_1_2_skip:          // 0 1 2 3 p1 p2 p3 p
            dup8 mstore             // 1 2 3 p1 p2 p3 p
            swap4 mstore            // 3 p1 1 p3 p
            swap2 swap1 mstore
            swap1 mstore
                dup1 0x80 add
                dup2 0x60 add
                dup3 0x40 add
                dup4 0x20 add       // p1 p2 p3 p4 p
                dup4 mload          // 4 p1 p2 p3 p4 p
                dup4 mload
                dup4 mload
                dup4 mload
                dup9 mload          // 0 1 2 3 4 p1 p2 p3 p4 p
            dup2 dup2 lt five_0_1_skip jumpi
            five_0_1_skip:          // 0 1 2 3 4
            dup5 dup5 lt five_3_4_skip jumpi
            swap3 swap4 swap3
            five_3_4_skip:          // 0 1 2 3 4
            dup5 dup4 lt five_2_4_skip jumpi
            swap2 swap4 swap2
            five_2_4_skip:          // 0 1 2 3 4
            dup4 dup4 lt five_2_3_skip jumpi
            swap2 swap3 swap2
            five_2_3_skip:          // 0 1 2 3 4
            dup5 dup3 lt five_1_4_skip jumpi
            swap1 swap4 swap1
            five_1_4_skip:          // 0 1 2 3 4
            dup4 dup2 lt five_0_3_skip jumpi
            five_0_3_skip:          // 0 1 2 3 4
            dup3 dup2 lt five_0_2_skip jumpi
            dup4 dup3 lt five_1_3_skip jumpi
            swap1 swap3 swap1
            five_1_3_skip:          // 0 1 2 3 4
            dup3 dup3 lt five_1_2_skip jumpi
            swap1 swap2 swap1
            five_1_2_skip:          // 0 1 2 3 4 p1 p2 p3 p4 p
            dup10 mstore            // 1 2 3 4 p1 p2 p3 p4 p
            swap5 mstore            // 3 4 p1 1 p3 p4 p
            swap5 mstore            // p1 1 p3 3 p
            mstore                  // p3 3 p
            mstore                  // p
                dup1 0xa0 add
                dup2 0x80 add
                dup3 0x60 add
                dup4 0x40 add
                dup5 0x20 add       // p1 p2 p3 p4 p
                dup5 mload          // 4 p1 p2 p3 p4 p
                dup5 mload
                dup5 mload
                dup5 mload
                dup5 mload
                dup11 mload          // 0 1 2 3 4 5 p1 p2 p3 p4 p5 p
                dup3 dup3 lt six_1_2_skip jumpi
                swap1 swap2 swap1
                dup6 dup6 lt six_4_5_skip jumpi
                swap4 swap5 swap4
                dup3 dup2 lt six_0_2_skip jumpi
                dup6 dup5 lt six_3_5_skip jumpi
                swap3 swap5 swap3
                dup2 dup2 lt six_0_1_skip jumpi
                dup5 dup5 lt six_3_4_skip jumpi
                swap3 swap4 swap3
                dup6 dup4 lt six_2_5_skip jumpi
                swap2 swap5 swap2
                dup4 dup2 lt six_0_3_skip jumpi
                dup5 dup3 lt six_1_4_skip jumpi
                swap1 swap4 swap1
                dup5 dup4 lt six_2_4_skip jumpi
                swap2 swap4 swap2
                dup4 dup3 lt six_1_3_skip jumpi
                swap1 swap3 swap1
                dup4 dup4 lt six_2_3_skip jumpi
                swap2 swap3 swap2
                six_2_3_skip:           // 0 1 2 3 4 5 p1 p2 p3 p4 p5 p
                dup12 mstore            // 1 2 3 4 5 p1 p2 p3 p4 p5 p
                swap6 mstore            // 3 4 5 p1 1 p3 p4 p5 p
                swap6 mstore            // 5 p1 1 p3 3 p5 p
                swap2 swap1 mstore      // 5 p3 3 p5 p
                swap2 swap1 mstore      // 5 p5 p
                swap1 mstore
                dup1 0xc0 add
                dup2 0xa0 add
                dup3 0x80 add
                dup4 0x60 add
                dup5 0x40 add
                dup6 0x20 add       // p1 p2 p3 p4 p5 p6 p
                dup6 mload          // 4 p1 p2 p3 p4 p
                dup6 mload
                dup6 mload
                dup6 mload
                dup6 mload
                dup6 mload
                dup13 mload          // 0 1 2 3 4 5 6 p1 p2 p3 p4 p5 p6 p
                dup3 dup3 lt seven_1_2_skip jumpi
                swap1 swap2 swap1
                dup5 dup5 lt seven_3_4_skip jumpi
                swap3 swap4 swap3
                dup7 dup7 lt seven_5_6_skip jumpi
                swap5 swap6 swap5
                dup3 dup2 lt seven_0_2_skip jumpi
                dup6 dup5 lt seven_3_5_skip jumpi
                swap3 swap5 swap3
                dup7 dup6 lt seven_4_6_skip jumpi
                swap4 swap6 swap4
                dup2 dup2 lt seven_0_1_skip jumpi
                dup6 dup6 lt seven_4_5_skip jumpi
                swap4 swap5 swap4
                dup7 dup4 lt seven_2_6_skip jumpi
                swap2 swap6 swap2
                dup5 dup2 lt seven_0_4_skip jumpi
                dup6 dup3 lt seven_1_5_skip jumpi
                swap1 swap5 swap1
                dup4 dup2 lt seven_0_3_skip jumpi
                dup6 dup4 lt seven_2_5_skip jumpi
                swap2 swap5 swap2
                dup4 dup3 lt seven_1_3_skip jumpi
                swap1 swap3 swap1
                dup5 dup4 lt seven_2_4_skip jumpi
                swap2 swap4 swap2
                dup4 dup4 lt seven_2_3_skip jumpi
                swap2 swap3 swap2
                seven_2_3_skip:         // 0 1 2 3 4 5 6 p1 p2 p3 p4 p5 p6 p
                dup14 mstore            // 1 2 3 4 5 6 p1 p2 p3 p4 p5 p6 p
                swap7 mstore            // 3 4 5 6 p1 1 p3 p4 p5 p6 p
                swap7 mstore            // 5 6 p1 1 p3 3 p5 p6 p
                swap7 mstore            // p1 1 p3 3 p5 5 p
                dup1 0xe0 add
                dup2 0xc0 add
                dup3 0xa0 add
                dup4 0x80 add
                dup5 0x60 add
                dup6 0x40 add
                dup7 0x20 add       // p1 p2 p3 p4 p5 p6 p
                dup7 mload          // 4 p1 p2 p3 p4 p
                dup7 mload
                dup7 mload
                dup7 mload
                dup7 mload
                dup7 mload
                dup7 mload
                dup15 mload          // 0 1 2 3 4 5 6 7 p1 p2 p3 p4 p5 p6 p7 p
                dup2 dup2 lt eight_0_1_skip jumpi
                dup4 dup4 lt eight_2_3_skip jumpi
                swap2 swap3 swap2
                dup6 dup6 lt eight_4_5_skip jumpi
                swap4 swap5 swap4
                dup8 dup8 lt eight_6_7_skip jumpi
                swap6 swap7 swap6
                dup3 dup2 lt eight_0_2_skip jumpi
                dup4 dup3 lt eight_1_3_skip jumpi
                swap1 swap3 swap1
                dup7 dup6 lt eight_4_6_skip jumpi
                swap4 swap6 swap4
                dup8 dup7 lt eight_5_7_skip jumpi
                swap5 swap7 swap5
                dup3 dup3 lt eight_1_2_skip jumpi
                swap1 swap2 swap1
                dup7 dup7 lt eight_5_6_skip jumpi
                swap5 swap6 swap5
                dup5 dup2 lt eight_0_4_skip jumpi
                dup8 dup5 lt eight_3_7_skip jumpi
                swap7 swap3 swap7
                dup6 dup3 lt eight_1_5_skip jumpi
                swap1 swap5 swap1
                dup7 dup4 lt eight_2_6_skip jumpi
                swap2 swap6 swap2
                dup5 dup3 lt eight_1_4_skip jumpi
                swap1 swap4 swap1
                dup7 dup5 lt eight_3_6_skip jumpi
                swap3 swap6 swap3
                dup5 dup4 lt eight_2_4_skip jumpi
                swap2 swap4 swap2
                dup6 dup5 lt eight_3_5_skip jumpi
                swap3 swap5 swap3
                dup5 dup5 lt eight_3_4_skip jumpi
                swap3 swap4 swap3
                eight_3_4_skip:         // 0 1 2 3 4 5 6 7 p1 p2 p3 p4 p5 p6 p7 p
                dup16 mstore            // 1 2 3 4 5 6 7 p1 p2 p3 p4 p5 p6 p7 p
                swap8 mstore            // 3 4 5 6 7 p1 1 p3 p4 p5 p6 p7 p
                swap8 mstore            // 5 6 7 p1 1 p3 3 p5 p6 p7 p
                swap8 mstore            // 7 p1 1 p3 3 p5 5 p7 p
                swap2 swap1 mstore      // 7 p3 3 p5 5 p7 p
                swap2 swap1 mstore      // 7 p5 5 p7 p
                swap2 swap1 mstore      // 7 p7 p
                swap1 mstore