# Submission 8298112c...

Challenge Remove duplicate elements zac.creditmint.eth 2018-53-11 196379
``````/**
* This file is part of the 1st Solidity Gas Golfing Contest.
*
*/

pragma solidity ^0.4.23;

contract Unique {
/**
* @dev Removes all but the first occurrence of each element from a list of
*      integers, preserving the order of original elements, and returns the list.
*
* The input list may be of any length.
*
* @return The input list, with any duplicate elements removed.
*/

// bloom filters! For when 'eeeh, probably' is good enough...

// probability of a hash collision = (1-e^(-kn/m))^k
// assuming n tops out at around 256 (although...)
// then m = 32*256 bits and k=4 gives probability p of 0.002%
// the value of k seems to be the overriding gas guzzler despite my best efforts
// setting k=1 and having a thwacking huge bitfield (768 machine words)
// seems to be the most gas-efficient way of getting an acceptably low p-value
function uniquify(uint[]) public view returns(uint[]) {

assembly {
// exit if no data
0x44 calldatasize eq no_data jumpi

// hash table: every 8 bit number
// converts to a unique 256 bit word with a
// single bit set
1 0x0 mstore
2 0x20 mstore
4 0x40 mstore
8 0x60 mstore
16 0x80 mstore
32 0xa0 mstore
64 0xc0 mstore
128 0xe0 mstore

// memory map:
// 0x0 - 0x100: hash table map
// 0x100 - 0x520: bloom filter storage
// 0x520 - end: input data (and result)

0x44 calldatasize sub
0x44
0x6120
calldatacopy

// variable declarations
// e = end of calldata
// a = memory pointer for next output word
// n = memory pointer for next input word
// h = hashed entry
// i_a = memory index to access bloom filter limb for first hash
// i_b = memory index to access bloom filter limb for second hash
// l_a = first bloom filter limb
// l_b = second bloom filter limb
// b_a = pseudo random bitsequence to compare first limb against
// b_b = pseudo random bitsequence to compare first limb against
// s = byte size of return array (minus dynamic array formatting)
// stack
0x6120                       // a e
0x6120                       // n a e

// fall into loop, we know there's at least one entry
loop_start:
// h = keccak256(n, 0x20)
0x20 dup2 keccak256     // h n a e

// get two bloom filter indices
// i_a = add(0x100, and(h, 0x7fe0))
0x5fe0 dup2 and 0x100 add            // i_a  h n a e
dup1 mload                          // l_a i_a h n a e
// get pseudorandom 8-bit value and map to bit sequence
dup3 3 byte mload                  // b_a l_a i_a h n a e

//  and(b_a,l_a)
dup2 dup2 and iszero
add_to_set jumpi                    // b_a l_a i_a h n a e

pop pop pop pop                     // n a e
0x20 add                            // n' a e
dup3 dup2 lt loop_start jumpi

loop_end:                               // n a e
pop                                     // a e
0x20 0x60e0 mstore
0x6120 swap1 sub                         // s e
// mstore(500, div(s, 0x20))
0x20 dup2 div 0x6100 mstore

// fiddling around with jumps makes the compiler complain about unbalanced stack issues
// here's your balanced stack! problem solved...
0 0 0

dup5 mload dup7 mstore             // b_a l_a i_a h n a e

// mstore(i_a, or(b_a, l_a))
or swap1 mstore                     // h n a e
pop                                 // n a e (I should optimize this out...)
swap1 0x20 add swap1                // n' a' e