Saturday, June 10, 2017

Snippet: List Comprehension vs Set AND operation


timeit.timeit('a = range(50);b=range(25, 75);set(a)&set(b)')
# takes about 5 seconds
timeit.timeit('a = range(50);b=range(25, 75);[x for x in a if x in b]')
# takes about 15 seconds

Sunday, April 30, 2017

Interesting Plot Snippet: Bitwise Or and Addition Operation

If the sum and the result of bitwise or operation of two numbers are the same, plotting these numbers in a scatter diagram produces a beautiful shape.

import matplotlib.pyplot as plt

l1 = []
l2 = []
for i in xrange(1000):
 for j in xrange(1000):
  if (i + j) == (i | j):
   l1.append(i)
   l2.append(j)

plt.scatter(l1,l2)
plt.show()

This is the figure that comes up:


Monday, January 9, 2017

Value Iteration in Gridworld, Reinforcement Learning

import operator
import copy
import math


class State:
    def __init__(self, rowid, colid):
        self.row = rowid
        self.col = colid

    def __str__(self):
        return str(self.row) + " " + str(self.col)


def get_value_from_state(state):
    return state_values[state.row][state.col]


def get_reward_from_state(state):
    return reward_values[state.row][state.col]


def move_to_new_state(state, action):
    if action == 'left':  # state will only change columnwise
        if state.col == 0 or (state.col == 2 and state.row == 1):  # then we can't move left
            return copy.deepcopy(state)
        else:
            return State(state.row, (state.col - 1))
    if action == 'right':  # state will only change columnwise
        if state.col == 3 or (
                        state.col == 0 and state.row == 1):  # can't move right when on the right boundary or at the first column
            return copy.deepcopy(state)
        else:
            return State(state.row, (state.col + 1))
    if action == 'up':
        if state.row == 0 or (state.row == 2 and state.col == 1):
            return copy.deepcopy(state)
        else:
            return State(state.row - 1, state.col)
    if action == 'down':
        if state.row == 2 or (state.row == 0 and state.col == 1):
            return copy.deepcopy(state)
        else:
            return State(state.row + 1, state.col)


def moveWithProbabilites(state, action):
    if action == 'left' or action == 'right':
        thisstate = move_to_new_state(state, action)
        upstate = move_to_new_state(state, 'up')
        downstate = move_to_new_state(state, 'down')
        return {thisstate: .8, upstate: .1, downstate: .1}

    elif action == 'up' or action == 'down':
        thisstate = move_to_new_state(state, action)
        leftstate = move_to_new_state(state, 'left')
        rightstate = move_to_new_state(state, 'right')
        return {thisstate: .8, leftstate: .1, rightstate: .1}


state_values = [
    [0, 0, 0, 1],
    [0, -100, 0, -1],  # -100 means we can't go this way
    [0, 0, 0, 0]
]
reward_values = [  # in each state, the agent gets a reward of -.03
    [-.03, -.03, -.03, -.03],
    [-.03, -.03, -.03, -.03],
    [-.03, -.03, -.03, -.03]
]
gamma = 1

states = [
    State(0, 0),
    State(0, 1),
    State(0, 2),
    State(1, 0),
    State(1, 2),
    State(2, 0),
    State(2, 1),
    State(2, 2),
    State(2, 3),
]
actions = ['left', 'right', 'up', 'down']

prev = state_values[0][0]

if __name__ == "__main__":
    while True:
        for s in states:
            mx = -9999999
            for a in actions:
                dic = moveWithProbabilites(s, a)
                tempSum = get_reward_from_state(s)  # to be added to the values later

                tempSum += gamma * dic.items()[0][1] * get_value_from_state(dic.items()[0][0])  # .9 * .1 * v(s)
                tempSum += gamma * dic.items()[1][1] * get_value_from_state(dic.items()[1][0])
                tempSum += gamma * dic.items()[2][1] * get_value_from_state(dic.items()[2][0])

                if tempSum > mx:
                    mx = tempSum

            state_values[s.row][s.col] = mx
            
        if abs(state_values[0][0] - prev) < .00003:
            break
        prev = state_values[0][0]

print state_values


Friday, December 23, 2016

Reinforcement Learning With Pygame Part - 1

Following is an example of a simple game which could be used to train agents. 
import pygame as pg
from pygame.locals import *
import sys
import random


def new_rect_after_action(newrct, act):
    if act == 'right':
        if newrct.right + newrct.width > windowWidth:
            return newrct
        else:
            return pg.Rect(newrct.left + newrct.width, newrct.top, newrct.width,
                           newrct.height)  # Rect(left, top, width, height)
    else:  # action is left
        if newrct.left - newrct.width < 0:
            return newrct
        else:
            return pg.Rect(newrct.left - newrct.width, newrct.top, newrct.width,
                           newrct.height)  # Rect(left, top, width, height)


def circle_falling(crclradius):
    newx = random.randint(0 + crclradius, 200 - crclradius)
    multiplier = random.randint(1, 4)
    newx *= multiplier
    return newx


windowWidth = 800
windowHeight = 400
FPS = 3  # frames per second setting
fpsClock = pg.time.Clock()

pg.init()

window = pg.display.set_mode((windowWidth, windowHeight))  # width, height
pg.display.set_caption('Catch the ball!')

# setup colors
RED = (255, 0, 0)
GREEN = (0, 255, 0)
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)

# specify circle properties
crclCentreX = 50
crclCentreY = 50
crclRadius = 20

# specify rectangle properties
rctLeft = 400
rctTop = 350
rctWidth = 200
rctHeight = 50
rct = pg.Rect(rctLeft, rctTop, rctWidth, rctHeight)  # Rect(left, top, width, height)

action = 'left'
score = 0
font = pg.font.Font(None, 30)

while True:

    for event in pg.event.get():
        if event.type == QUIT:
            pg.quit()
            sys.exit()
        elif event.type == pg.KEYDOWN:
            if event.key == pg.K_LEFT:
                action = 'left'
                rct = new_rect_after_action(rct, action)
            elif event.key == pg.K_RIGHT:
                action = 'right'
                rct = new_rect_after_action(rct, action)

    window.fill(BLACK)
    # at this position, the rectangle should be here. else loses
    if crclCentreY >= windowHeight - rctHeight - crclRadius:
        if rct.left <= crclCentreX <= rct.right:
            score += 1
        else:
            score -= 1
        crclCentreX = circle_falling(crclRadius)
        crclCentreY = 50
    else:
        crclCentreY += 20

    pg.draw.circle(window, RED, (crclCentreX, crclCentreY),
                   crclRadius)  # circle(Surface, color, pos(x, y), radius, width=0)
    pg.draw.rect(window, GREEN, rct)  # rect(Surface, color, Rect, width=0)

    text = font.render('score: ' + str(score), True, (238, 58, 140))
    window.blit(text, (windowWidth - 100, 10))
    pg.display.update()
    fpsClock.tick(FPS)

This will create a game window that looks like the following screenshot. The green rectangle can be moved with left and right arrow of the keyboard. If the player can place it under the falling red circle, he gets a point, else loses one. That's all there is in this game. In the second part of this tutorial, we will add intelligence and train an agent to play this game.

Update: Second part is now available: http://anixtech.blogspot.com/2016/12/reinforcement-learning-with-pygame.html

Wednesday, July 27, 2016

Simplified Neural Network Backpropagation Example Python

This is an even simplified version of this fantastic tutorial: https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/
Let's consider a very simple neural network shown in the following image:

All we want is that given the input .1, the network would output the value .9.
We will be using $\frac{1}{1+e^{-x}}$ as our activation function. Initially both weight w1 and w2 are initialized to .5, though this can be any arbitrary value. However, different initialization shall converge to different final values.

First let's consider the net input in the hidden layer(neth). It's simply the input multiplied by w1. That is:(Here, $outh$ means the output of the hidden layer, $neto$ means the net input in the output layer, $outo$ is the output of the output layer. $E$ is the error. )

$neth = i1 * w1 = .1 * .5 = .05$
$outh = \frac{1}{1+e^{-neth}} = \frac{1}{1+e^{-.05}} = .51249$

$neto = outh * w2 = .51249 * .5 = .25624$
$outo = \frac{1}{1+e^{-neto}} = \frac{1}{1+e^{-.25624}} = .563711$

$E = \frac{1}{2}*(target - outo)^2 =  \frac{1}{2}*(.9 - .563711)^2 = .056545$

This concludes our forward pass of the network. We've selected that particular error function for our convenience, as it's convex and differentiable. Now, we can see that our target value was $.9$ but we've got $.5637$ . We are going to update our weights $w1, w2$ so that the network produces that desired output. How would we do that is the reason for using the famous Backpropagation algorithm. The basic idea is, we will calculate how much responsibility should each of the weights take for producing that error. Then the weight would be modified accordingly and the next iteration would begin.

First let's see what happens in case of $w2$. The partial differentiation expression $\frac{\partial E}{\partial w2}$ says the rate of change in the error with respect to $w2$. As it can't be calculated directly, we would use the chain rule to find it's constituting parts.

$\frac{\partial E}{\partial w2} = \frac{\partial E}{\partial outo} * \frac{\partial outo}{\partial neto} * \frac{\partial neto}{\partial w2}$


Let's find out each of those parts separately:

$\frac{\partial E}{\partial outo} = \frac{\partial}{\partial outo}(\frac{1}{2}*(target - outo)^2)  = \frac{1}{2} * 2 * ( target - outo) * (-1) $
$= (.9 - .563711) * (-1) = -.336289$

$\frac{\partial outo}{\partial neto} = \frac{\partial }{\partial neto} (\frac{1}{1+e^{-neto}}) = \frac{-1}{(1+e^{-neto})^2} * e^{-neto} * (-1) = \frac{e^{-neto}}{(1+e^{-neto})^2}$
$  = outo * (1-outo) = .563711 * ( 1- .563711) = .24594$

$\frac{\partial neto}{\partial w2} = \frac{\partial (w2*outh)}{\partial w2} = outh = .51249$

So finally we have found the responsibility of $w2$ for producing the error $E$:

$\frac{\partial E}{\partial w2} = -.33628 * .24594 * .51249 = -.042386$

Now let's turn our attention to $w1$, that is we want to calculate $\frac{\partial E}{\partial w1}$. Again, chain rule comes to our rescue.

$\frac{\partial E}{\partial w1} = \frac{\partial E}{\partial outo} * \frac{\partial outo}{\partial neto} *\frac{\partial neto}{\partial outh} * \frac{\partial outh}{\partial neth} * \frac{\partial neth}{\partial w1}$

We already have calculated $\frac{\partial E}{\partial outo}$ and $\frac{\partial outo}{\partial neto}$. Let's calculate the rest.

$\frac{\partial neto}{\partial outh} = \frac{\partial (w2*outh)}{\partial outh} = w2 = .5$
$\frac{\partial outh}{\partial neth} = outh * ( 1- outh) = .51249 * ( 1- .51249) = .24984$
$\frac{\partial neth}{\partial w1} = \frac{\partial (w1 * input)}{\partial w1} = input = .1 $

So,

$\frac{\partial E}{\partial w1} = -.336289 * .24594 * .5 * .24984 * .1 = -.001033$

Now it's time to update our initial guesses of $w1$ and $w2$. Let's say our learning rate $\eta = .9$
$w1 = w1 - (\eta * \frac{\partial E}{\partial w1}) =  .5 + .9 * 001033 = 0.5009298654$
Similarly,
$w2 = w2 - ( \eta * \frac{\partial E}{\partial w2}) = .5 + .9 * 042386 = 0.538148$

We can continue this process until our network's output is very close to our target. The following is a python implementation of that process:

from __future__ import division
import math

ctr = 0
eta = .9
w1 = .5
w2 = .5

i1 = .1
target = .9

while True:
    ctr += 1
    neth = i1 * w1
    outh = 1 / (1 + math.exp(-neth))

    neto = w2 * outh
    outo = 1 / (1 + math.exp(-neto))

    e = .5 * math.pow((target - outo), 2)
    print 'error now is: ', e
    if e < .00005:
        print '*' * 100
        print 'target is: ', target
        print 'output is now: ', outo
        print 'w1 and w2 is: ', w1, ',', w2
        print 'number of iterations: ', ctr
        break
    # update w2
    de_by_douto = (-1) * (target - outo)
    douto_by_dneto = outo * (1 - outo)
    dneto_by_dw2 = outh

    de_by_dw2 = de_by_douto * douto_by_dneto * dneto_by_dw2

    # update w1
    # de_by_dw1 = de_by_douto * douto_by_dneto * dneto_by_douth * douth_by_dneth * dneth_by_dw1
    dneto_by_douth = w2
    de_by_outh = de_by_douto * douto_by_dneto * dneto_by_douth
    douth_by_dneth = outh * (1 - outh)
    dneth_by_dw1 = i1

    de_by_dw1 = de_by_outh * douth_by_dneth * dneth_by_dw1

    # updated w1 and w2
    w1 = w1 - (eta * de_by_dw1)
    w2 = w2 - (eta * de_by_dw2)

Sample output:

Tuesday, July 26, 2016

Who Liked Your Last N Number of Facebook Posts Most

Previously it was possible to find out this information for any friend's profile too. But Facebook has recently revoked that access and apps now need to have this access directly from that friend. However, you can find out this info about yourself in the following way:

1. Go to this link: https://developers.facebook.com/tools/explorer/.
2. Click on 'Get Token' > 'Get User Access Token' > check user_posts > 'Get Access token'.
3. This would ask you for the permission to read your timeline. Click on 'Continue as your-name'.
4. You would get an access token like the following image. Copy the whole string.
5. Replace the 'your-token' part in the following code with that string.
from __future__ import division
import operator
import requests
from facepy import GraphAPI

large_dic = {}
token = 'your-token'
graph = GraphAPI(token)
n = 50  # number of posts in the history that we want to consider
ctr = 0
pcount = 0
res = graph.get('me/feed')

def update_progress(progress):
    print '\r{0}% [{1}]'.format(int(progress), '#'*int(progress/10)),

def merge_two_dicts(dict1, dict2):
    """ merges and sums up dict1 with the values from dict2. if key2 from dict2 is present in dict1, then dict1[key2] = dict1[key2] + dict2[key2]
    else it appends key2 with the value as it is
    example: if
    d1 = {'a': 1, 'b': 1, 'c': 1} and
    d2 = {'a': 4, 'd': 1}
    then it returns {'a': 5, 'c': 1, 'b': 1, 'd': 1}
    :param dict1, dict2
    :return merged dict1 and dict2
    """
    for key2 in dict2:
        if key2 in dict1:
            dict1[key2] = dict1[key2] + dict2[key2]
        else:
            dict1[key2] = dict2[key2]
    return dict1


def get_likes_list(post_id):
    """given a post id, this method queries the graphapi to get details of the post, gets the likes list
    and adds them into a dictionary by using the person's name as the key
    :param post_id
    :return dictionary containing likers
    """
    global graph
    global pcount
    d = {}  # to hold the liker's name with a count of 1

    likes_obj = graph.get(post_id + '/likes')
    while True:
        try:
            for liker in likes_obj['data']:
                d[liker['name']] = 1
            likes_obj = requests.get(likes_obj['paging']['next']).json()
        except KeyError:
            pcount += 1
            update_progress((pcount/n)*100)
            break
    return d


for item in res['data']:
    merge_two_dicts(large_dic, get_likes_list(item['id']))

while True:
    if ctr == (n - 25) / 25:  # as 25 is the default limit of /me/feed, to get the last n posts, set the value of ctr to (n-25)/25
        break
    try:
        res = requests.get(res['paging']['next']).json()
        for item in res['data']:
            merge_two_dicts(large_dic, get_likes_list(item['id']))
        ctr += 1
    except KeyError:
        print 'end of calculating posts'


sorted_x = sorted(large_dic.items(), key=operator.itemgetter(1))

print '\n'
print '*'*100
print 'total posts considered', pcount
print 'total likes generated', sum(large_dic.values())
print 'average likes per post', sum(large_dic.values()) / pcount
print '*'*100

for item in reversed(sorted_x):
    print item[0], 'liked your posts', item[1], 'times'

6. Change the value of n in the program to your preference, this is the number of posts that we want to consider while counting the likes.

7. Run that program, it might take a while depending on the response time of facebook server. The output would contain the list of likes in a sorted fashion. With the most liker at the top.

Sample output: