import re

str = str = 'an example word:cat!!'
match = re.search(r'word:\w\w\w', str)

if match:                      
    print ('found', match.group())
else:
    print ('did not find')
match = re.search(r'iii', 'piiig')
match = re.search(r'igs', 'piiig')
match = re.search(r'..g', 'piiig')
match = re.search(r'\d\d\d', 'p123g')
match = re.search(r'\w\w\w', '@@abcd!!')
match = re.search(r'pi+', 'piiig')
match = re.search(r'i+', 'piigiiii')
match = re.search(r'\d\s*\d\s*\d', 'xx1 2   3xx')
match = re.search(r'\d\s*\d\s*\d', 'xx12  3xx')
match = re.search(r'\d\s*\d\s*\d', 'xx123xx')
match = re.search(r'^b\w+', 'foobar')
match = re.search(r'b\w+', 'foobar')
import re

line = "Cats are smarter than dogs";

searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)

if searchObj:
   print ("searchObj.group() : ", searchObj.group())
   print ("searchObj.group(1) : ", searchObj.group(1))
   print ("searchObj.group(2) : ", searchObj.group(2))
else:
   print ("Nothing found!!")

Email Example

str = 'purple alice-b@google.com monkey dishwasher'
match = re.search(r'\w+@\w+', str)
if match:
    print (match.group())
match = re.search(r'[\w.-]+@[\w.-]+', str)
if match:
    print (match.group())

Group Extraction

str = 'purple alice-b@google.com monkey dishwasher'
match = re.search('([\w.-]+)@([\w.-]+)', str)
if match:
    print (match.group())
    print (match.group(1))
    print (match.group(2))

findall

str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'
emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['alice@google.com', 'bob@abc.com']
for email in emails:
    print (email)

findall and Groups

str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'
tuples = re.findall(r'([\w\.-]+)@([\w\.-]+)', str)
print (tuples)
for tuple in tuples:
    print (tuple[0])
    print (tuple[1])

Substitution

import re

phone = "2004-959-559 # This is Phone Number"

# Delete Python-style comments
num = re.sub(r'#.*$', "", phone)
print ("Phone Num : ", num)

# Remove anything other than digits
num = re.sub(r'\D', "", phone)    
print ("Phone Num : ", num)

Exercise:

(1) Write a Python program testdate.py that uses a regular expression to test whether a text file contains a valid date in numerical notation. The program accepts one or more file names as command line arguments and prints basic usage information when provided with no arguments. Examples for valid dates include "30/1/10", "30/1/2010", "30-1-2010", "30-01-2010", "30.1.2010", "30. 1. 2010", and even "2010-01-30". Your program should detect these formats and other variations. At the same time, it should have as few false positives as possible, for example, not treating "13010", "01302010", or "30-30-10" as dates. Note that if a text file contains a valid date, it may still contain other arbitrary text.

(2) type(). The type() built-in function returns a type object, which is displayed as a Pythonic-looking string:


>>> type(0)
<type 'int' >
>>> type(.34)
<type 'float'>
>>> type(dir)
<type 'builtin_function_or_method'>

Create a regex that would extract out the actual type name from the string. Your function should take a string like this "<"type 'int'>” and return “int”. (The same for all other types, i.e., ‘float’, ‘builtin_function_or_method’, etc.) Note: You are implementing the value that is stored in the __name__ attribute for classes and some built-in types.

(3) Here’s a list of made-up gene names:
xkn59438, yhdck2, eihd39d9, chdsye847, hedle3455, xjhd53e, 45da, de37dp
Write a program that will print only the gene names that satisfy the following criteria – treat each criterion separately: