timeit.timeit('a = range(50);b=range(25, 75);set(a)&set(b)')
# takes about 5 seconds
timeit.timeit('a = range(50);b=range(25, 75);[x for x in a if x in b]')
# takes about 15 seconds
Saturday, June 10, 2017
Snippet: List Comprehension vs Set AND operation
Sunday, April 30, 2017
Interesting Plot Snippet: Bitwise Or and Addition Operation
If the sum and the result of bitwise or operation of two numbers are the same, plotting these numbers in a scatter diagram produces a beautiful shape.
import matplotlib.pyplot as plt l1 = [] l2 = [] for i in xrange(1000): for j in xrange(1000): if (i + j) == (i | j): l1.append(i) l2.append(j) plt.scatter(l1,l2) plt.show()This is the figure that comes up:
Monday, January 9, 2017
Value Iteration in Gridworld, Reinforcement Learning
import operator
import copy
import math
class State:
def __init__(self, rowid, colid):
self.row = rowid
self.col = colid
def __str__(self):
return str(self.row) + " " + str(self.col)
def get_value_from_state(state):
return state_values[state.row][state.col]
def get_reward_from_state(state):
return reward_values[state.row][state.col]
def move_to_new_state(state, action):
if action == 'left': # state will only change columnwise
if state.col == 0 or (state.col == 2 and state.row == 1): # then we can't move left
return copy.deepcopy(state)
else:
return State(state.row, (state.col - 1))
if action == 'right': # state will only change columnwise
if state.col == 3 or (
state.col == 0 and state.row == 1): # can't move right when on the right boundary or at the first column
return copy.deepcopy(state)
else:
return State(state.row, (state.col + 1))
if action == 'up':
if state.row == 0 or (state.row == 2 and state.col == 1):
return copy.deepcopy(state)
else:
return State(state.row - 1, state.col)
if action == 'down':
if state.row == 2 or (state.row == 0 and state.col == 1):
return copy.deepcopy(state)
else:
return State(state.row + 1, state.col)
def moveWithProbabilites(state, action):
if action == 'left' or action == 'right':
thisstate = move_to_new_state(state, action)
upstate = move_to_new_state(state, 'up')
downstate = move_to_new_state(state, 'down')
return {thisstate: .8, upstate: .1, downstate: .1}
elif action == 'up' or action == 'down':
thisstate = move_to_new_state(state, action)
leftstate = move_to_new_state(state, 'left')
rightstate = move_to_new_state(state, 'right')
return {thisstate: .8, leftstate: .1, rightstate: .1}
state_values = [
[0, 0, 0, 1],
[0, -100, 0, -1], # -100 means we can't go this way
[0, 0, 0, 0]
]
reward_values = [ # in each state, the agent gets a reward of -.03
[-.03, -.03, -.03, -.03],
[-.03, -.03, -.03, -.03],
[-.03, -.03, -.03, -.03]
]
gamma = 1
states = [
State(0, 0),
State(0, 1),
State(0, 2),
State(1, 0),
State(1, 2),
State(2, 0),
State(2, 1),
State(2, 2),
State(2, 3),
]
actions = ['left', 'right', 'up', 'down']
prev = state_values[0][0]
if __name__ == "__main__":
while True:
for s in states:
mx = -9999999
for a in actions:
dic = moveWithProbabilites(s, a)
tempSum = get_reward_from_state(s) # to be added to the values later
tempSum += gamma * dic.items()[0][1] * get_value_from_state(dic.items()[0][0]) # .9 * .1 * v(s)
tempSum += gamma * dic.items()[1][1] * get_value_from_state(dic.items()[1][0])
tempSum += gamma * dic.items()[2][1] * get_value_from_state(dic.items()[2][0])
if tempSum > mx:
mx = tempSum
state_values[s.row][s.col] = mx
if abs(state_values[0][0] - prev) < .00003:
break
prev = state_values[0][0]
print state_values
Subscribe to:
Comments (Atom)
