Thursday, June 2, 2016

Comparing Radix, Merge, Selection and Bubble Sort

Implementation of Radix Sort:

bucket = {}

# creating bucket for each digits
def reset_bucket():
    global bucket
    for i in xrange(10): bucket[i] = []
# to handle variable length items`
def radix_group_numbers(numbers):
    joined = []
    max_length = max(len(str(x)) for x in numbers)
    bkt = {}
    for i in xrange(max_length + 1):
        bkt[i] = []
    for item in numbers:
        bkt[len(str(item))].append(item)
    for k, v in bkt.items():
        if v: joined.extend(radix_sort(v))
    return joined

def put_in_buckets(numbers, index):  # assuming that the numbers are of equal lenght
    global bucket
    try:
        for item in numbers:
            c = str(item)[index]
            if c is '0':
                bucket[0].append(item)
            elif c is '1':
                bucket[1].append(item)
            elif c is '2':
                bucket[2].append(item)
            elif c is '3':
                bucket[3].append(item)
            elif c is '4':
                bucket[4].append(item)
            elif c is '5':
                bucket[5].append(item)
            elif c is '6':
                bucket[6].append(item)
            elif c is '7':
                bucket[7].append(item)
            elif c is '8':
                bucket[8].append(item)
            elif c is '9':
                bucket[9].append(item)
    except IndexError:
        print 'error occured'
        print numbers, index

def radix_sort(lst):
    global bucket
    max_length = max(len(str(x)) for x in lst)
    for i in xrange(max_length):
        reset_bucket()
        put_in_buckets(lst, max_length - i - 1)
        lst = []
        for key, value in bucket.iteritems():
            if value is not None:
                for item in value:
                    lst.append(item)
        bucket.clear()
    return lst

Implementation of Merge Sort:
# merge two lists
class stack:
    def __init__(self, lst=None):
        if lst is None:
            self.items = []
        else:
            self.items = lst

    def pop(self):
        return self.items.pop()
    def peek(self):
        return self.items[len(self.items) - 1]
    def __str__(self):
        return str(self.items)
    def empty(self):
        return len(self.items) is 0
    def reverse(self):
        l = []
        while self.empty() is False:
            l.append(self.items.pop())
        return l

def merge(l1, l2):  # takes two sorted lists and merges them while keeping the order
    ll = []  # long list consists of l1 + l2
    l1.reverse()
    l2.reverse()
    st1 = stack(l1)
    st2 = stack(l2)
    while st1.empty() is False and st2.empty() is False:
        i1 = st1.peek()
        i2 = st2.peek()
        if i1 <= i2:
            ll.append(i1)
            l1.pop()
        else:
            ll.append(i2)
            l2.pop()
    if st1.empty() is True and st2.empty() is False:
        ll = ll + st2.reverse()
    elif st2.empty() is True and st1.empty() is False:
        ll = ll + st1.reverse()

    return ll


def merge_sort(lst):
    if len(lst) is 1:
        return lst
    else:
        mid = len(lst) // 2
        left = merge_sort(lst[:mid])
        right = merge_sort(lst[mid:])
        return merge(left, right)
Implementation of Selection Sort:
from utils import get_numbers_list

def selection_sort(unsorted_lst):
    for k in xrange(len(unsorted_lst) - 1):  # at max this much iteration required to fully sort

        current_minimum = unsorted_lst[k]  # first elem as the current minimum
        current_minimum_idx = k

        for i in xrange(k, len(unsorted_lst), 1):
            if current_minimum > unsorted_lst[i]:
                current_minimum = unsorted_lst[i]
                current_minimum_idx = i
        # swap the current minimum with the kth element
        tmp = unsorted_lst[k]
        unsorted_lst[k] = current_minimum
        unsorted_lst[current_minimum_idx] = tmp
    return unsorted_lst

Bubble Sort:
from utils import get_numbers_list

def bubble_sort(lst):
    max_iter = len(lst)
    counter = 0
    for m in xrange(max_iter):
        for i in xrange(max_iter - 1 - counter):  # -counter to execute faster
            if lst[i] > lst[i + 1]:  # should swap
                tmp = lst[i + 1]
                lst[i + 1] = lst[i]
                lst[i] = tmp
        counter += 1
    return lst

Now let's generate some sample data:
import sys
import random

# n is the number of samples generated
def generate_samples(n):
    lst = []
    while(n > 0):
        lst.append(random.randint(0, 1000000)) #random integer between 0 and a million
        n -= 1
    f = open("data.txt", "w+")
    for item in lst:
        f.write(str(item) + " ")
    f.close()
    print 'samples written at data.txt...'

def get_numbers_list(fname):
    num_arr = []
    f = open(fname, "r")
    for line in f:
        num_arr.append(list(int(x) for x in line.strip().split(" ")))
    return num_arr[0]
Then we measure the time required for their executions:
import time

from utils import generate_samples, get_numbers_list
from bubble_sort import bubble_sort
from merge_sort import merge_sort
from radix_sort import radix_sort, radix_group_numbers
from selection_sort import selection_sort

#first we need to generate a hugh amount of data

generate_samples(10000)

lst = get_numbers_list('data.txt')

start_time = time.time()
radix_group_numbers(lst)
end_time = time.time()

print 'radix sort took', end_time - start_time, 'seconds'

start_time = time.time()
merge_sort(lst)
end_time = time.time()

print 'merge sort took', end_time - start_time, 'seconds'

start_time = time.time()
bubble_sort(lst)
end_time = time.time()

print 'bubble sort took', end_time - start_time, 'seconds'

start_time = time.time()
selection_sort(lst)
end_time = time.time()

print 'selection sort took', end_time - start_time, 'seconds'
Sample outputs: for 1000 data points:
radix sort took 0.00797295570374 seconds
merge sort took 0.0257380008698 seconds
bubble sort took 0.0774970054626 seconds
selection sort took 0.0248939990997 seconds
for 10000 data points:
radix sort took 0.0430598258972 seconds
merge sort took 0.187104940414 seconds
bubble sort took 8.21462607384 seconds
selection sort took 2.59621596336 seconds