Anik's Tech Tutorials: Comparing Radix, Merge, Selection and Bubble Sort

Implementation of Radix Sort:

bucket = {}

# creating bucket for each digits
def reset_bucket():
    global bucket
    for i in xrange(10): bucket[i] = []
# to handle variable length items`
def radix_group_numbers(numbers):
    joined = []
    max_length = max(len(str(x)) for x in numbers)
    bkt = {}
    for i in xrange(max_length + 1):
        bkt[i] = []
    for item in numbers:
        bkt[len(str(item))].append(item)
    for k, v in bkt.items():
        if v: joined.extend(radix_sort(v))
    return joined

def put_in_buckets(numbers, index):  # assuming that the numbers are of equal lenght
    global bucket
    try:
        for item in numbers:
            c = str(item)[index]
            if c is '0':
                bucket[0].append(item)
            elif c is '1':
                bucket[1].append(item)
            elif c is '2':
                bucket[2].append(item)
            elif c is '3':
                bucket[3].append(item)
            elif c is '4':
                bucket[4].append(item)
            elif c is '5':
                bucket[5].append(item)
            elif c is '6':
                bucket[6].append(item)
            elif c is '7':
                bucket[7].append(item)
            elif c is '8':
                bucket[8].append(item)
            elif c is '9':
                bucket[9].append(item)
    except IndexError:
        print 'error occured'
        print numbers, index

def radix_sort(lst):
    global bucket
    max_length = max(len(str(x)) for x in lst)
    for i in xrange(max_length):
        reset_bucket()
        put_in_buckets(lst, max_length - i - 1)
        lst = []
        for key, value in bucket.iteritems():
            if value is not None:
                for item in value:
                    lst.append(item)
        bucket.clear()
    return lst

Implementation of Merge Sort:

# merge two lists
class stack:
    def __init__(self, lst=None):
        if lst is None:
            self.items = []
        else:
            self.items = lst

    def pop(self):
        return self.items.pop()
    def peek(self):
        return self.items[len(self.items) - 1]
    def __str__(self):
        return str(self.items)
    def empty(self):
        return len(self.items) is 0
    def reverse(self):
        l = []
        while self.empty() is False:
            l.append(self.items.pop())
        return l

def merge(l1, l2):  # takes two sorted lists and merges them while keeping the order
    ll = []  # long list consists of l1 + l2
    l1.reverse()
    l2.reverse()
    st1 = stack(l1)
    st2 = stack(l2)
    while st1.empty() is False and st2.empty() is False:
        i1 = st1.peek()
        i2 = st2.peek()
        if i1 <= i2:
            ll.append(i1)
            l1.pop()
        else:
            ll.append(i2)
            l2.pop()
    if st1.empty() is True and st2.empty() is False:
        ll = ll + st2.reverse()
    elif st2.empty() is True and st1.empty() is False:
        ll = ll + st1.reverse()

    return ll


def merge_sort(lst):
    if len(lst) is 1:
        return lst
    else:
        mid = len(lst) // 2
        left = merge_sort(lst[:mid])
        right = merge_sort(lst[mid:])
        return merge(left, right)

Implementation of Selection Sort:

from utils import get_numbers_list

def selection_sort(unsorted_lst):
    for k in xrange(len(unsorted_lst) - 1):  # at max this much iteration required to fully sort

        current_minimum = unsorted_lst[k]  # first elem as the current minimum
        current_minimum_idx = k

        for i in xrange(k, len(unsorted_lst), 1):
            if current_minimum > unsorted_lst[i]:
                current_minimum = unsorted_lst[i]
                current_minimum_idx = i
        # swap the current minimum with the kth element
        tmp = unsorted_lst[k]
        unsorted_lst[k] = current_minimum
        unsorted_lst[current_minimum_idx] = tmp
    return unsorted_lst

Bubble Sort:

from utils import get_numbers_list

def bubble_sort(lst):
    max_iter = len(lst)
    counter = 0
    for m in xrange(max_iter):
        for i in xrange(max_iter - 1 - counter):  # -counter to execute faster
            if lst[i] > lst[i + 1]:  # should swap
                tmp = lst[i + 1]
                lst[i + 1] = lst[i]
                lst[i] = tmp
        counter += 1
    return lst

Now let's generate some sample data:

import sys
import random

# n is the number of samples generated
def generate_samples(n):
    lst = []
    while(n > 0):
        lst.append(random.randint(0, 1000000)) #random integer between 0 and a million
        n -= 1
    f = open("data.txt", "w+")
    for item in lst:
        f.write(str(item) + " ")
    f.close()
    print 'samples written at data.txt...'

def get_numbers_list(fname):
    num_arr = []
    f = open(fname, "r")
    for line in f:
        num_arr.append(list(int(x) for x in line.strip().split(" ")))
    return num_arr[0]

Then we measure the time required for their executions:

import time

from utils import generate_samples, get_numbers_list
from bubble_sort import bubble_sort
from merge_sort import merge_sort
from radix_sort import radix_sort, radix_group_numbers
from selection_sort import selection_sort

#first we need to generate a hugh amount of data

generate_samples(10000)

lst = get_numbers_list('data.txt')

start_time = time.time()
radix_group_numbers(lst)
end_time = time.time()

print 'radix sort took', end_time - start_time, 'seconds'

start_time = time.time()
merge_sort(lst)
end_time = time.time()

print 'merge sort took', end_time - start_time, 'seconds'

start_time = time.time()
bubble_sort(lst)
end_time = time.time()

print 'bubble sort took', end_time - start_time, 'seconds'

start_time = time.time()
selection_sort(lst)
end_time = time.time()

print 'selection sort took', end_time - start_time, 'seconds'

Sample outputs: for 1000 data points:

radix sort took 0.00797295570374 seconds
merge sort took 0.0257380008698 seconds
bubble sort took 0.0774970054626 seconds
selection sort took 0.0248939990997 seconds

for 10000 data points:

radix sort took 0.0430598258972 seconds
merge sort took 0.187104940414 seconds
bubble sort took 8.21462607384 seconds
selection sort took 2.59621596336 seconds

Anik's Tech Tutorials

Thursday, June 2, 2016

Comparing Radix, Merge, Selection and Bubble Sort

No comments:

Post a Comment