作者ilay (说唱猜做玩)
看板Python
标题Re: [闲聊] 这个要怎麽parse...
时间Sun Apr 22 17:33:59 2012
贴出惨不忍睹的 code 来看一下了XD
# -*- coding: utf-8 -*-
import urllib, urllib2, cookielib, xml.dom.minidom
# origin 'eshopid': '996',
'eshoppwd': '711storemap',
'sid': '2',
'storecategory': '2',
'showtype': '1',
'storeid': ''
}
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
login_data = urllib.urlencode(data)
opener.open('
http://emap.pcsc.com.tw/emap_switch.aspx', login_data)
#print cj._cookies
#print cj._cookies['emap.pcsc.com.tw']['/']['yearmonth'].value
#print cj._cookies['emap.pcsc.com.tw']['/']['ASP.NET_SessionId'].value
# area code
area_code = {
'台北市': '01',
'基隆市': '02',
'新北市': '03',
'桃园县': '04',
'新竹市': '05',
'新竹县': '06',
'苗栗县': '07',
'台中市': '08',
#'台中县': '09',
'彰化县': '10',
'南投县': '11',
'云林县': '12',
'嘉义市': '13',
'嘉义县': '14',
'台南市': '15',
#'台南县': '16',
'高雄市': '17',
#'高雄县': '18',
'屏东县': '19',
'宜兰县': '20',
'花莲县': '21',
'台东县': '22',
'澎湖县': '23',
'金门县': '25',
'马祖' : '24'
}
# function to get response
def get_resp(data):
post_data = urllib.urlencode(data)
resp = opener.open('
http://emap.pcsc.com.tw/EMapSDK.aspx', post_data)
result = resp.read()
return result
# start req some data
# GET TOWM
for city in area_code:
data = {
'cityid': area_code[city],
'commandid': 'GetTown'
}
#print data
#post_data = urllib.urlencode(data)
#resp = opener.open('
http://emap.pcsc.com.tw/EMapSDK.aspx', post_data)
#print city, xml.dom.minidom.parseString(resp.read())
#dom = xml.dom.minidom.parseString(resp.read())
dom = xml.dom.minidom.parseString(get_resp(data))
#dom = get_resp(data)
#print get_resp(data)
#xmldoc = dom.getElementsByTagName("iMapSDKOutput")
#print xmldoc
print '=====', city, '====='
#area = []
for node in dom.childNodes:
for node2 in node.getElementsByTagName("TownName"):
print ' ', node2.firstChild.nodeValue
#area.append(node2.firstChild.nodeValue)
data2 = {
'city': city,
'commandid': 'FindRoad',
'town': node2.firstChild.nodeValue
}
#print data2
#print data2
#post_data2 = urllib.urlencode(data2)
#resp2 = opener.open('
http://emap.pcsc.com.tw/EMapSDK.aspx', post_data2)
print get_resp(data2)
#dom2 = xml.dom.minidom.parseString(resp2.read())
#print node2.firstChild.nodeValue, dom2
#print dom2.toxml()
#print "[", ", ".join(i for i in area), "]"
--
※ 发信站: 批踢踢实业坊(ptt.cc)
◆ From: 114.33.253.178
1F:→ ilay:基本上是延续前面大神的code作修改的 改成这样抱歉了(掩面) 04/22 17:35
2F:→ doghib:'town': node2.firstChild.nodeValue.encode('big5') ? 04/22 17:54
3F:→ kilfu0701:'town': node2.firstChild.nodeValue.encode('utf-8') 04/22 18:26
4F:→ kilfu0701:把unicode转成utf-8 再urlencode() 04/22 18:28
5F:→ kilfu0701:或是 urllib.quote(data2['town'].encode('utf-8')) 04/22 18:34
6F:→ ilay:哦哦哦! 太棒了 谢谢你qq 04/22 20:36