Implementation of Radix Sort:
bucket = {}
# creating bucket for each digits
def reset_bucket():
global bucket
for i in xrange(10): bucket[i] = []
# to handle variable length items`
def radix_group_numbers(numbers):
joined = []
max_length = max(len(str(x)) for x in numbers)
bkt = {}
for i in xrange(max_length + 1):
bkt[i] = []
for item in numbers:
bkt[len(str(item))].append(item)
for k, v in bkt.items():
if v: joined.extend(radix_sort(v))
return joined
def put_in_buckets(numbers, index): # assuming that the numbers are of equal lenght
global bucket
try:
for item in numbers:
c = str(item)[index]
if c is '0':
bucket[0].append(item)
elif c is '1':
bucket[1].append(item)
elif c is '2':
bucket[2].append(item)
elif c is '3':
bucket[3].append(item)
elif c is '4':
bucket[4].append(item)
elif c is '5':
bucket[5].append(item)
elif c is '6':
bucket[6].append(item)
elif c is '7':
bucket[7].append(item)
elif c is '8':
bucket[8].append(item)
elif c is '9':
bucket[9].append(item)
except IndexError:
print 'error occured'
print numbers, index
def radix_sort(lst):
global bucket
max_length = max(len(str(x)) for x in lst)
for i in xrange(max_length):
reset_bucket()
put_in_buckets(lst, max_length - i - 1)
lst = []
for key, value in bucket.iteritems():
if value is not None:
for item in value:
lst.append(item)
bucket.clear()
return lst
Implementation of Merge Sort:
# merge two lists
class stack:
def __init__(self, lst=None):
if lst is None:
self.items = []
else:
self.items = lst
def pop(self):
return self.items.pop()
def peek(self):
return self.items[len(self.items) - 1]
def __str__(self):
return str(self.items)
def empty(self):
return len(self.items) is 0
def reverse(self):
l = []
while self.empty() is False:
l.append(self.items.pop())
return l
def merge(l1, l2): # takes two sorted lists and merges them while keeping the order
ll = [] # long list consists of l1 + l2
l1.reverse()
l2.reverse()
st1 = stack(l1)
st2 = stack(l2)
while st1.empty() is False and st2.empty() is False:
i1 = st1.peek()
i2 = st2.peek()
if i1 <= i2:
ll.append(i1)
l1.pop()
else:
ll.append(i2)
l2.pop()
if st1.empty() is True and st2.empty() is False:
ll = ll + st2.reverse()
elif st2.empty() is True and st1.empty() is False:
ll = ll + st1.reverse()
return ll
def merge_sort(lst):
if len(lst) is 1:
return lst
else:
mid = len(lst) // 2
left = merge_sort(lst[:mid])
right = merge_sort(lst[mid:])
return merge(left, right)
Implementation of Selection Sort:
from utils import get_numbers_list
def selection_sort(unsorted_lst):
for k in xrange(len(unsorted_lst) - 1): # at max this much iteration required to fully sort
current_minimum = unsorted_lst[k] # first elem as the current minimum
current_minimum_idx = k
for i in xrange(k, len(unsorted_lst), 1):
if current_minimum > unsorted_lst[i]:
current_minimum = unsorted_lst[i]
current_minimum_idx = i
# swap the current minimum with the kth element
tmp = unsorted_lst[k]
unsorted_lst[k] = current_minimum
unsorted_lst[current_minimum_idx] = tmp
return unsorted_lst
Bubble Sort:
from utils import get_numbers_list
def bubble_sort(lst):
max_iter = len(lst)
counter = 0
for m in xrange(max_iter):
for i in xrange(max_iter - 1 - counter): # -counter to execute faster
if lst[i] > lst[i + 1]: # should swap
tmp = lst[i + 1]
lst[i + 1] = lst[i]
lst[i] = tmp
counter += 1
return lst
Now let's generate some sample data:
import sys
import random
# n is the number of samples generated
def generate_samples(n):
lst = []
while(n > 0):
lst.append(random.randint(0, 1000000)) #random integer between 0 and a million
n -= 1
f = open("data.txt", "w+")
for item in lst:
f.write(str(item) + " ")
f.close()
print 'samples written at data.txt...'
def get_numbers_list(fname):
num_arr = []
f = open(fname, "r")
for line in f:
num_arr.append(list(int(x) for x in line.strip().split(" ")))
return num_arr[0]
Then we measure the time required for their executions:
import time
from utils import generate_samples, get_numbers_list
from bubble_sort import bubble_sort
from merge_sort import merge_sort
from radix_sort import radix_sort, radix_group_numbers
from selection_sort import selection_sort
#first we need to generate a hugh amount of data
generate_samples(10000)
lst = get_numbers_list('data.txt')
start_time = time.time()
radix_group_numbers(lst)
end_time = time.time()
print 'radix sort took', end_time - start_time, 'seconds'
start_time = time.time()
merge_sort(lst)
end_time = time.time()
print 'merge sort took', end_time - start_time, 'seconds'
start_time = time.time()
bubble_sort(lst)
end_time = time.time()
print 'bubble sort took', end_time - start_time, 'seconds'
start_time = time.time()
selection_sort(lst)
end_time = time.time()
print 'selection sort took', end_time - start_time, 'seconds'
Sample outputs:
for 1000 data points:
radix sort took 0.00797295570374 seconds merge sort took 0.0257380008698 seconds bubble sort took 0.0774970054626 seconds selection sort took 0.0248939990997 secondsfor 10000 data points:
radix sort took 0.0430598258972 seconds merge sort took 0.187104940414 seconds bubble sort took 8.21462607384 seconds selection sort took 2.59621596336 seconds
No comments:
Post a Comment