#!/usr/bin/env python2.3 import urllib, urllib2 import cgi, re from BeautifulSoup import BeautifulSoup from xml.dom.minidom import parseString from BeautifulSoup import BeautifulSoup #from HTMLParser import HTMLParser #class MyHTMLParser(HTMLParser): # def handle_starttag(self, tag, attrs): # if tag == "body": # print "Encountered the beginning of a %s tag" % tag # def handle_endtag(self, tag): # if tag == "body": # print "Encountered the end of a %s tag" % tag def nsw_geocode(number, street, suburb): """Takes a number, street and suburb and returns a list of matching results. Each result in the list of a tuple of lat,lng tuples. E.g: [ ((lat, lng), (lat, lng)), ((lat, lng), (lat, lng))]. Each results is the boundary of the property searched for.""" url = "http://address.maps.nsw.gov.au/AddressSearchWebClient/faces/AddressSearchExample.jsp" req = urllib2.Request(url= url) f = urllib2.urlopen(req) cookie = f.info()["Set-Cookie"].split(";")[0] f.close() post_data = "form1%%3AhouseNumberText=%s&form1%%3AroadNameText=%s&form1%%3AroadTypeText=&form1%%3AsuburbText=%s&form1%%3ApostCodeText=&form1%%3AsearchButton=Search&form1=form1" % (number, street.lower(), suburb.lower()) #post_data = "form1:houseNumberText=13&form1:roadNameText=James&form1:roadTypeText=&form1:suburbText=Enmore&form1:postCodeText=&form1:searchButton=Search&form1=form1" url = "http://address.maps.nsw.gov.au/AddressSearchWebClient/faces/AddressSearchExample.jsp" req = urllib2.Request(url= url, data = post_data, headers = {"Cookie":cookie}) f = urllib2.urlopen(req) data = f.read() # Stupid broken shit data = data.replace("--!>", "-->") soup = BeautifulSoup(data) #print "SOUP!", soup cur_idx = 0 last = {} res = [] for table in soup('table', {'id' : 'form1:resultsTable'}): for inp in table('input'): inp_id = inp.get("id", None) if "TableRowAction" in inp_id: bits = inp_id.split(":") num = int(bits[2]) name = bits[-1] if num > cur_idx: res.append(last) last = {} cur_idx = num last[name] = inp.get("value", None) if last.keys(): res.append(last) real_res = [] for each in res: real_res.append( (float(each['centreY']), float(each['centreX']) )) return real_res if __name__ == "__main__": print nsw_geocode("89", "Camden", "Enmore")