str = 'aaa,bbb,ccc'
array = str.split(',')
array = ['A','dead','parrot']
delim = "."
str1 = delim.join(array) #str1 is 'A.dead.parrot'
str2 = ' '.join(array) #str2 is 'A dead parrot'
array = [1,2,3,4]
try:
# some risky code
except Error, e:
# catch any errors
print e # print error
print sys.exc_info() # another way to print error
def somefunc(x,y):
print x + y
If you want to supress the interpretation of escape sequences, you want a raw string constant. To make a string a raw string constant, just put an 'r' in front of it (outside the quotes).
Example:
'\\\\' # normal string, contains two backslashes
r'\\\\' # raw string, contains four backslashes
os.getcwd()
os.chdir("/path/to/dir")
os.path.exists('somefile.txt')os.path.exists('somedir')
os.path.isfile('something')
os.path.isdir('something')
os.path.getsize('somefile')
name = "/home/temp.txt"
print os.path.basename(name) # contains 'temp.txt'
print os.path.dirname(name) # contains '/home'
print os.path.split(name) # contains ('/home', 'temp.txt')
for p in (1,2,3,4,5,6,7,8,9,10) :
print p
# same thing:
for p in range(1, 10):
print p
# you can also use an 'else'
for i in foo:
if i == 0:
break
else:
print("i was never 0") # only gets run if we never used 'break' to break out of the loop.
name = raw_input("what's your name?: ")
file = open('test.txt','r')
File Modes:
r = read
w = write
a = append
r+,w+ = read and write
a+ = read and append
This assumes we opened a file with the 'open' function and the file handle is called 'fh':
fh.read() # read the whole file
fh.read(n) # read the next n bytes in the file
fh.readline() # read the next line in the file
fh.readlines() # read the entire file and return a list of line strings
fh.write("hello!")
os.rename("from this name","to this name")
os.remove("this file")
os.chmod("filename",0777)
They have the general form of:
(lambda arg1, arg2, arg3 etc : some expression)
You can only have one expression, and the value of that expression is automatically returned.
Example:
times = (lambda x, y: x * y)
print times(2,20) # prints 40
# you can't use print in lambda, so here's how you have to do it:
import sys
p = (lambda x: sys.stdout.write(str(x)+"\n"))
p(10) # prints 10
You need to declare global variables as global in each function that modifies that variable:
globvar = 0
def set_globvar_to_one():
global globvar # Needed to modify global copy of globvar
globvar = 1
def print_globvar():
print globvar # No need for global declaration to read value of globvar
str(42)
int("42")
In this example, x has a default value of 2:
def adder(x=2):
x+=2
print x
adder(5) # prints 7
adder() # prints 4
Here's a very basic class. __init__ acts as a constructor.
class Animal:
def __init__(self, name="my pet"):
self.name = name
me = Animal("james")
print me.name
import webbrowser
webbrowser.open("http://xkcd.com/353/")
Use the dis module.
For example:
def myname(x):
print x + " is my name."
import dis
print dis.dis(myname)
Here we're looking at the bytecode generated for the 'myname' function. If you want to find out what the generated instructions (like 'PRINT_ITEM') mean, check out this page.
Use three quotes at the start and end of the string. Example:
s = """this is a
multiline string
in python."""
Can also be used with strings.
L = [1,2,3,4]
len(L) # returns 4
Can be used with both lists and strings.
S = "aditya"
S[1:3] # "di"
S[1:] # "ditya", same as S[1:6] or S[1:len(S)]
S[:5] # "adity", same as S[0,5] or S[:-1] or S[0:-1]
S[:] # "aditya", same as S
# Optional third index:
S[::1] # "aditya", same as S and S[:]
S[::2] # "aiy", the '2' means every other
S[::3] # "aiy", the '3' means every third
S[::-1]# "aytida", reverses the string
S[::-2]# "atd", reverses the string and gets every other one
S = "aditya"
S.find("it") # returns 2, the index where the substring starts
S = "aditya"
R = S.replace("a","b") # replace ALL, R is now "bdityb"
R = S.replace("a","b",1) # replace ONE, R is now "bditya"
L = [1,2,3]
L += [4,5,6] # adding a list to a list
L.append(7) # append = push in python. There is no 'push' method.
# L is now [1,2,3,4,5,6,7]
L.insert(1,44)
# L is now [1,44,2,3,4,5,6,7]
L = [1,2,3,4,5,6,7]
L.pop() # removes '7'
# L is now [1,2,3,4,5,6]
L.pop(2) # removes '5'
# L is now [1,2,3,4,6]
del L[4] # removes [6]
# L is now [1,2,3,4]
L = [4,5,6,3,2,4,5]
L.sort()
# L is now [2, 3, 4, 4, 5, 5, 6]
L = [1,2,3,4]
L.reverse() # L is now [4,3,2,1]
L = [[1,2,3],
[4,5,6],
[7,8,9]]
L[0] # [1,2,3]
L[0][1] # 2
Very similar to a for loop.
L = [[1,2,3],
[4,5,6],
[7,8,9]]
col2 = [row[1] for row in L]
# col2 is now [2,5,8]
# diagonal:
diag = [L[i][i] for i in [0,1,2]]
#diag is now [1,5,9]
D = {'name':'dennis','age':23}
D['name'] # 'dennis'
2D dictionary:
D = {'name' : {'first':'dennis','last':'mitchell'},
'age' : 23}
print D['name']['first'] # prints 'dennis'
for key in sorted(D):
print key,"=>",D[key]
Truncates remainders. Example:
print 5//2 # prints '2' instead of '2.5'
import math
math.sqrt(144) # returns 12
import math
math.pi
import random
#generate a number between 1 and 10
random.randint(1,10)
#choose one of the following
random.choice('Hail to the Thief','OK Computer','The Bends')
# generate a random floating point number between 0 and 1
random.random() # returns something like 0.456838483
D = {'name':'dennis','age':30}
L = D.keys() # L is now ['name','age'] or ['age','name']...no definite order
D = {'name':'dennis','age':30}
L = D.values() # L is now ['dennis',30] or [30,'dennis']...no definite order
D = {'name':'dennis','age':30}
D.pop('name') # D is now {'age':30}
Python's version of Perl's chomp():
line = "Adit
"
line = line.rstrip() # line is now 'Adit'
# doesn't change anything if last character isn't a newline
line2 = "Calvin"
line2 = line2.rstrip() # line2 is still 'Calvin'
Shelve works exactly like Pickle, except objects are saved in an access-by-key database i.e. you can think of it as a dictionary on disk. Suppose we have the following custom class, and we want to be able to save and retrieve objects of this class:
class Person:
def __init__(self,x=""):
self.name = x
def printme(self):
print "my name is ",self.name
Here's how we save the object:
D = Person("dennis")
import shelve
F = shelve.open("datafile") # notice the file doesn't have a 'txt' extension and we don't specify a mode (r or w)
F['key'] = D
F.close()
Here's how we retrieve it:
import shelve
F = shelve.open("datafile")
D = F['key']
D.printme() # call the printme function of the Person class
# NOT a copy
L = [1,2,[3,4]]
L2 = L
L2[0] = 50 # Now L is [50,2,3,4]
# Shallow copy
L3 = L[:]
L3[0] = 100 # L is still [50,2,[3,4]]
L3[2][1] = 100 # L is now [50,2,[3,100]]
# Deep copy
import copy
L3 = copy.deepcopy(L)
L3[0] = 100 # L is still [50,2,[3,4]]
L3[2][1] = 100 # L is still [50,2,[3,4]]
The following values equal True:
"spam"
1
50
The following values equal False:
""
[]
{}
0.0
None
# all values of a will have a default value of 4
import collections
a = collections.defaultdict(lambda: 4)
print a[5] # will print '4'
# all values of b will have a default value of 'dennis'
b = collections.defaultdict(lambda: 'dennis')
print b[5] # will print 'dennis'
from random import shuffle
l = [1,2,3,4]
shuffle(l)
name = list("adit")
print name # name is now ['a', 'd', 'i', 't']
# Less obvious way of doing it:
name[:] = "adit"
Add a comma at the end. Example:
print "name" # same as print "name\n" in Perl
print "name", # same as print "name" in Perl
Python has no switch statement. You can use either a series of if/else if's or use a dictionary:
# This prints 50
choice = 'ham';
print { 'pram': 40,
'ham' : 50,
'spam': 30,
'eggs': 2 }[choice]
range(3) # generates [0,1,2]
range(2,4) # generates [2,3]
range(0,10,2) # generates [0,2,4,6,8]
Allows us to use multiple sequences in parallel.
L1 = [1,2,3,4]
L2 = [5,6,7,8]
L3 = zip(L1,L2) # L3 is now [ (1,5) , (2,6) , (3,7) , (4,8) ]
# Processing both sequences simultaneously in a for loop:
for (x,y) in zip(L1,L2):
print x + y,
# prints out: 6 8 10 12
Takes:
1. a function
2. a list of arguments to pass to the function one by one
Returns:
a list containing the results of putting all those values in the function one at a time.
Example:
def square(x):
return x**2
l = map(square,[2,3,4,5])
print l # prints [4, 9, 16, 25]
Gets both the current index we're at in a list and the value at that index.
Example:
l = list("calvin")
for (x,y) in enumerate(l) :
print x,y
# Prints:
# 0 c
# 1 a
# 2 l
# 3 v
# 4 i
# 5 n
The way you make a list is by putting values inside brackets:
[1,2,3,4] # a list
List comprehension is a way to add some logic to the values you are putting in. For example:
L = [1,2,3,4] # make a list the normal way
L2 = [l for each l in L] # L2 is also [1,2,3,4]
L3 = [l**2 for each l in L] # L3 is [1,4,9,16]
The basic syntax of list comprehension is:
[ expression for target1 in sequence1 [if condition]
expression for target2 in sequence2 [if condition]
...etc upto an unlimited number of fors and ifs
]
As you can see, we can also nest for loops in a list comprehension. Example:
L = [x + y for x in "ab" for y in "cd"] # L is ['ac', 'ad', 'bc', 'bd']
This tells you all the directories that are searched when looking for a module.
import sys
for p in sys.path: print p
Here's an example of importing a module 'myMod' that resides in the dir2 folder, which in turn resides in the dir1 folder:
import dir1.dir2.myMod
Both dir1 and dir2 MUST contain a file called __init__.py, or the import will fail.
The __init__.py files can be empty, or they can contain some code. Python will run any code they contain as it hits that directory.
If you don't want everything in your module to be exported when a user types 'from *', here's what you can do:
1. Any names prefixed with an underscore are not imported by default with a 'from *':
A = 2 # imported
_A = 2 # not imported
2. You can specify a list called '__all__' at the top of your module that contains all the names to be exported when someone uses a 'from *':
__all__ = ["firstFunc"]
def firstFunc(): # exported
pass
def secondFunc(): # not exported
pass
Of course, if a user wants, they can still get the second function by asking for it explicitly:
from someModule import secondFunc
# another way:
import someModule
someModule.secondFunc()
When defining a class, put all the classes you want it to inherit from in parentheses:
class myClass(parentClass1,parentClass2):
pass
When you create a new object, Python doesn't automatically call it's parent's constructor for you. One reason is that classes can inherit from multiple classes, so Python automatically calling any parent constructors would become a complex mess. Instead, you need to specifically call the constructor of any parent classes:
class Animal:
def __init__(self,name="Timmy"):
self.type = "generic"
self.name = name
class Dog(Animal):
def __init__(self,name="Snowy"):
# here we call the parent's constructor:
Animal.__init__(self,name)
self.type = "dog"
Here are some of the operations you can overload in your class:
__init__ # Constructor, Object creation: X = Class( )
__del__ # Destructor, Object reclamation
__add__ # Operator +, X + Y, X += Y
__or__ # Operator | (bitwise OR), X | Y, X |= Y
__repr__,__str__ # Printing, conversions, print X, repr(X), str(X)
__call__ # Function calls, X( )
__getattr__ # Qualification, X.undefined
__setattr__ # Attribute assignment, X.any = value
__getitem__ # Indexing, X[key], for loops and other iterations if no _ _iter_ _
__setitem__ # Index assignment, X[key] = value
__len__ # Length, len(X), truth tests
__cmp__ # Comparison, X == Y, X < Y
__lt__ # Specific comparison, X < Y (or else __cmp__)
__eq__ # Specific comparison, X == Y (or else __cmp__)
__radd__ # Right-side operator +, Noninstance + X
__iadd__ # In-place addition, X += Y (or else __add__)
__iter__ # Iteration contexts, for loops, in tests, list comprehensions, map, others
Here's a class that overloads '__init__' and '__add__':
class Person:
def __init__(self,age=0):
self.age = age
def __add__(self,x):
self.age+=x
return self
We can use it like so:
me = Person(30)
me+=20
print me.age # prints 50
Suppose we have the following class:
class Person:
numPeople = 0
def __init__(self,name="unknown"):
self.name = name
Person.numPeople+=1
Here, 'name' is an instance variable since each instance of the class will get it's own 'name' variable.
'numPeople' on the other hand is a class variable.
Use the @staticmethod function decorator:
class Person:
def instMethod(self):
# do something in the instance method
@staticmethod # define this as a static method
def statMethod(): # notice we don't pass 'self' in
# do something in the static method
# call the static method:
Person.statMethod()
Here's a standard function for calculating the nth fibonacci number:
def fib(x):
if x < 2: return x
return fib(x - 1) + fib(x - 2)
This function is very slow; it's running time is O(2^n). Now we use a decorator function to speed it up by using memoization. The end result will be this:
@memoize
def fib(x):
if x < 2: return x
return fib(x - 1) + fib(x - 2)
This function is exactly the same as the previous one except for the '@memoize' at the top, but it's running itme is now O(n). Here's the definition for the memoize decorator:
class memoize:
def __init__(self, function): # automatically gets whatever function you've decorated using this.
self.function = function
self.memo = {}
def __call__(self, *args): # gets called each time the function gets called.
if args not in self.memo:
self.memo[args] = self.function(*args)
return self.memo[args]
Example:
print "Usage: %s [arg 1] [arg 2]" % sys.argv[0]
Very easy. SQLite3 is also built in with Python v2.5 and above, which makes life even easier. Here's an example:
import sqlite3
# Connect to the database
conn = sqlite3.connect('reqs.db')
c = conn.cursor()
# Get data
c.execute('select id, name from sometable')
for row in c:
print row[0] # id
print row[1] # name
# Send data
c.execute('insert into sometable (name) values ("bill")')
# Save (commit) the changes
conn.commit()
# We can also close the cursor if we are done with it
c.close()
Run the following on the command line:
python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()"
import os
os.path.splitext(filename)[1] # returns something like '.jpg' or '.zip'
Just convert it to a set and back again:
a = [1,1,1,2,3,4,5,5,5]
list(set(a)) # a is now [1,2,3,4,5]
str = " adit "
str = str.strip()
Generator expressions are an easy way to create generator objects.
x = (2**y for y in range(10)) # x is now a generator object
for e in x:
pass # do something
Notice that the syntax is almost the same as that of a list comprehension; the only difference is that we're using parentheses "(" instead of brackets "[".
if key in my_dict:
my_dict[key] += 1 # the key exists
else:
my_dict[key] = 1 # the key doesn't exist
The current version of Python discards remainders in division:
print 8/3 # prints 2
Future versions of Python will do division properly. To enable proper division, use:
from __future__ import division
print 8/3 # now prints 2.6666666666666665
This preserves precision.
It's easy using matplotlib:
from pylab import plot, show
l = []
for x in range(2,50):
l.append(fib(x)/fib(x-1)) # fib is some function that calculates fibonacci numbers
plot(l)
show()
That's it! Assuming we have a 'fib' function, that code generates this image:
You can also save the image using:
from pylab import savefig
savefig("fibonacci.gif")
raise [exception name]
raise Exception
# exception with a message
raise Exception("you messed up!")
Heres the basic idea:
import re
s = "calvin"
r1 = re.compile(r'.+') # r1 is now a regex object
if r1.match(s):
print "yes it matches r1"
r2 = re.compile(r'^$') # r2 is now a regex object
if r2.match(s): print "yes it matches r2"
Match vs. Search: A Crucial Difference
'match' starts from the beginning of the string, whereas 'search' searches for anywhere in the string.
re.match(r"o", "dog") # No match as "o" is not the first letter of "dog".
re.search(r"o", "dog") # Match as search() looks everywhere in the string.
You can also do:
# re.match(pattern, string)
re.match(r'.+', s)
But compiling is more efficient if the regex will be used several times in the program.
Using flags
Heres an example using the "ignore case" flag:
re.match(r'.+',s,re.I) # re.I is the ignore case flag
Capturing Groups:
m = re.match(r'.+', s)
if m:
print m.group(0)
print m.start(0) # prints starting index of group 0
print m.end(0) # prints ending index of group 0
print m.groups() # prints a tuple containing all the groups
0 = entire regex match
1 = the first captured group i.e. (......)
etc.
Substitution
m = re.sub(r'cal','vin',s)
print m # prints vinvin
# Do it with compiled regexes:
r3 = re.compile(r'cal')
print r3.sub('vin',s)
Splitting on a regex match
r1.split(s)
Find All Matches
import re
s = "calvin"
r1 = re.compile(r'.') # r1 is now a regex object
m = r1.findall(s)
print m # prints ['c', 'a', 'l', 'v', 'i', 'n']
Raw Strings
In regex, we use raw strings. In raw strings, escape sequences are not escaped:
s = r'\' # raw string, two backslashes
s = '\' # regular string, one backslash
Matching Over Multiple Lines
use the re.S flag, which makes the '.' match multiple lines:
r1 = re.compile('.+',re.S)
raise SystemExit
if type(s) is list:
print "is a list"
if type(s) is file:
print "is a file"
if type(s) is str:
print "is a string"
if type(s) is dict:
print "is a dict"
You can use dir() to get a list of names that the module defines:
import os
dir(os) # names in os
dir() # names in the current file
You can also use the help function:
import os
help(os)
This gives a lot more information as well, such as where the file is located.
import somemodule
print somemodule.__file__
m.items() # returns a list of (key,value) pairs
m.get(k,v) # if m[k] exists, returns that, otherwise returns v
m.setdefault(k,v) # if m[k] exists, returns that, otherwise sets m[k] to v and returns v
m.popitem() # removes a random (key,value) pair from m and returns it as a tuple
Here's an example of extending the builtin str type and using operator overloading. In this example, we will allow users to subtract one string from another.
class myStr(str):
def __sub__(self,s):
self = self.replace(s,'')
return self
s1 = myStr("AdityaAditya")
s2 = myStr("Adit")
print s1 - s2 # prints 'yaya'
import sys
help(sys) # get help on the sys module.
Factory functions are functions that return functions. Here's an example:
def Greeter(x):
def inner(name):
print x+",",name+"!"
return inner
eng = Greeter("Hello")
ger = Greeter("Guten Tag")
eng("Tao") # prints 'Hello, Tao!'
ger("Knuth") # prints 'Guten Tag, Knuth!'
The really interesting thing is that the variable 'x' gets saved, instead of being deleted once the Greeter function ends as it usually would be.
Here's a function that takes an arbitrary # of arguments and saves them as tuples:
def printer(*args): print args
printer (1) # prints '(1,)' (and yes that comma is intentional)
printer (1,2,3,4) # prints '(1, 2, 3, 4)'
A parameter in a function definition with * after it will suck up all arguments. There can only be one such parameter and it has to be at the very end. The only exception is the ** modifier which we'll see soon.
The * can also be used to unpack arguments. For example:
def add(x,y,z): return x + y + z
a = [1,2,3]
add(*a) # returns 6
What * is to tuples (not lists!), ** is to dictionaries:
def func(**kwargs): print kwargs
import string
d = dict.fromkeys(string.lowercase,1)
func(**d) # prints {'a': 1, 'c': 1, 'b': 1, 'e': 1, etc...
You can also have both * and ** in a function:
def func(*args,**kwargs): print args, kwargs
func(1,2,3,c=3,d=5) # prints (1, 2, 3) {'c': 3, 'd': 5}
And you can also use these in lambda expressions:
s = (lambda *a: sum(a))
k = range(1,51)
print s(*k) # prints 1275
Suppose you want to do something like:
x = range(10)
for e in x:
pass # do something
You are using memory to store all those objects in x. If you want to save on that memory, you can use a generator instead. A generator is a function that yields a value instead of returning it:
def squares(x):
for i in range(x):
yield i**2
Now we can use that generator to print squares:
for i in squares(50):
print i
We can also get a generator object:
x = squares(10) # x is now a generator object
print x.next() # prints 0
print x.next() # prints 1
print x.next() # prints 4
print x.next() # prints 9
print x.next() # prints 16
The yield statement suspends the function, and sends a value back to the caller, but retains enough state to enable the function to resume from where it left off. Values are computed on-demand, so you never have the entire result in memory.
See also Generator Expressions.
import time
start = time.time()
x = [2**i for i in range(10000)] # first 10000 powers of 2
print time.time() - start
To prettyprint something in Python, use the pprint module:
import pprint
somedata = {"a":"apple","b":"ball"}
pprint.pprint(somedata)
import simplejson as j
import urllib as u
query = u.urlencode({"q":"calvin","key":"YOUR-KEY-HERE"})
url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s" % (query)
res = u.urlopen(url)
json = j.loads(res.read())
print "estimated page count:",json["responseData"]["cursor"]["estimatedResultCount"]
You can also see the full structure of the response using pprint:
import pprint
pprint.pprint(res)
import exceptions
class MyError(exceptions.Exception):
def __init__(self,value=None):
self.value = value
def __str__(self):
return self.value
raise MyError,"testing"
reload(module)
import sys
print sys.modules
Modules are searched for in this order:
1. Current Directory
2. Directories listed in the PYTHONPATH environment variable
3. An installation-dependent default path: usually the directory where the standard library modules are located.
See the instructions here. On the command line, use the following command to build the library:
bjam toolset=darwin
If you want to find out the size of an object (any object) and you have Python 2.6 or higher, you can use:
sys.getsizeof(int)
sys.getsizeof(whatever)
If you want to find out the largest integer supported by the integer type, you can use:
import sys
print sys.maxint
Use the atexit module:
import atexit as a
def goodbye():
print "bye!"
# general form: register(functionname, args, kwargs)
a.register(goodbye)
for i in range(10):
print i
To check whether you are on a little endian or a big endian system, use:
import sys
print sys.byteorder
In Python2.6 and above, you can do this:
bin(2**3) # prints 0b1000
This is the equivalent of backticks in Perl.
# Here we are piping some data to less. Here we use the -r flag to show ansi escape sequences (colors).
import subprocess
proc = subprocess.Popen(["less -r"], stdin=subprocess.PIPE, shell=True)
proc.communicate("test")
It's easy using Beautiful Soup:
import urllib as u
from BeautifulSoup import BeautifulSoup as b
url = "http://github.com/"
res = u.urlopen(url)
soup = b(res.read())
images = soup.findAll('img')
for i in images:
print i['src']
Use optparse:
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-l", "--local", dest="local",
action="store_true",
help="search the local basics")
parser.add_option("-t", "--term",
dest="term",
action="store_true",
help="show the results in the terminal")
(options, args) = parser.parse_args()
Flags
Now you can access the above flags using their destination value:
if options.local:
# do something if -l flag was used
Positional Arguments
The positional arguments will be stored in the args list:
print args[0] # print the first positional argument
import urllib
urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "destination.mp3")
execfile('test.pl')
from PIL import Image
im = Image.open("test.jpg")
from PIL import Image
Im = Image.open('red.jpg')
Im = Im.convert('L')
Im.show()
im = Image.open('lilybw.jpg')
im.save('test.jpg')
out = im.point(lambda i: i * 10) # multiply each pixel by 1.2
from pylab import *
>
from PIL import Image
im = Image.open('lilybw.jpg')
# convert the image to a numpy array (i.e. a matrix)
m = asarray(im)
I = identity(row)
# im2 is m * an identity matrix, so im2 = m
im2 = dot(m,I)
# m was of type uint8, but after taking the dot product,
# im2 is of type double. So when converting back to an
# image, we first need to convert from double to uint8.
im2 = Image.fromarray(im2.astype(uint8),'L')
im2.show()
a = "hello there"
a.encode("rot13") # prints "uryyb gurer"
unicode(some_ascii_str)
python -m pdb myscript.py