车站信息提取
import requests
import json
import re
车站信息保存地址
url = "https://www.12306.cn/index/script/core/common/station_name_v10104.js"
模拟浏览器请求车站信息
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36',
'Cookie': '_uab_collina=160803690845585204105401; JSESSIONID=68E6E2A0E0FF66F4E416B83A0AE6112D; RAIL_EXPIRATION=1608365238025; RAIL_DEVICEID=pVOrHbFn8WzCANjWnqNGH1OGw3sw5K9Ehg5VA2cEhdjRvlVSk8fWs5vshisU8ar92a8t3MQuYjyeZHGl1x0N9SCRnuPvqm_ntcvaTlyudlIHziMwp_5VHT9t0WbDVQ0EB2wDPNmWwlKWmR8hcyz025A4bRpN6TRO; _jc_save_fromStation=%u5357%u4EAC%2CNJH; _jc_save_toStation=%u8FDE%u4E91%u6E2F%2CUIH; _jc_save_wfdc_flag=dc; BIGipServerpool_passport=249823754.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=854589962.38945.0000; _jc_save_toDate=2020-12-16; _jc_save_fromDate=2020-12-20',
'X-Requested-With':'XMLHttpRequest'}
response = requests.get(url, headers=headers)
html_info=response.text
通过正则提取页面内容
pat = "var station_names ='(.*?)';"
result = re.compile(pat,re.S).findall(html_info)[0]
datas = result.split("|")
将站名与三字码依次提取,并放入字典中
station_names = {}
for i in range(0,(len(datas)//5)):
station_names[datas[1+5*i]]=datas[2+5*i]
查询余票
车次查询请求链接
url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2020-12-20&leftTicketDTO.from_station=NJH&leftTicketDTO.to_station=UIH&purpose_codes=ADULT'
模拟浏览器请求车次信息
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36',
'Cookie': '_uab_collina=160803690845585204105401; JSESSIONID=68E6E2A0E0FF66F4E416B83A0AE6112D; RAIL_EXPIRATION=1608365238025; RAIL_DEVICEID=pVOrHbFn8WzCANjWnqNGH1OGw3sw5K9Ehg5VA2cEhdjRvlVSk8fWs5vshisU8ar92a8t3MQuYjyeZHGl1x0N9SCRnuPvqm_ntcvaTlyudlIHziMwp_5VHT9t0WbDVQ0EB2wDPNmWwlKWmR8hcyz025A4bRpN6TRO; _jc_save_fromStation=%u5357%u4EAC%2CNJH; _jc_save_toStation=%u8FDE%u4E91%u6E2F%2CUIH; _jc_save_wfdc_flag=dc; BIGipServerpool_passport=249823754.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=854589962.38945.0000; _jc_save_toDate=2020-12-16; _jc_save_fromDate=2020-12-20',
'X-Requested-With':'XMLHttpRequest'}
response = requests.get(url, headers=headers)
html_info=response.text
提取车次和车站信息
ticket_info=json.loads(html_info)["data"]["result"]
map_info=json.loads(html_info)["data"]["map"]
处理车次数据
tickets = []
for i in ticket_info:
tickets.append(i.split('|'))
展示所需车次信息
ticket=tickets[3]
print('出发日期:',ticket[13])
print('车次:',ticket[3])
print('出发站:',map_info[ticket[6]])
print('目的地:',map_info[ticket[7]])
print('发车时间:',ticket[8])
print('到达时间:',ticket[9])
print('历时时间:',ticket[10])
print('商务座:',ticket[32])
print('一等座:',ticket[31])
print('二等座:',ticket[30])
print('软卧:',ticket[23])
print('硬卧:',ticket[28])
print('硬座:',ticket[29])
print('无座:',ticket[26])
车次信息展示
出发日期: 20201220
车次: C3887
出发站: 南京南
目的地: 连云港
发车时间: 08:16
到达时间: 10:55
历时时间: 02:39
商务座: 10
一等座: 有
二等座: 有
软卧:
硬卧:
硬座:
无座: