import re
import urllib2
def load_page(url):
page = urllib2.urlopen(url).read()
links = re.findall("]+>[^<]+", page)
names = [re.search(">([^<]+)<", i).group(1).lower() for i in links if "listingName" in i]
return names
all_names = []
for i in xrange(1, 878):
all_names += load_page('http://www.babyhold.com/list/babygirlnames/page%d/' % i)
def len_cmp(x, y):
if len(x)len(y):
return 1
return 0
unique_names = list(set(all_names))
unique_names.sort(cmp=len_cmp)
print '\n'.join(unique_names)