车站信息提取

import requests
import json
import re

车站信息保存地址

url = "https://www.12306.cn/index/script/core/common/station_name_v10104.js"

模拟浏览器请求车站信息

headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36',
           'Cookie': '_uab_collina=160803690845585204105401; JSESSIONID=68E6E2A0E0FF66F4E416B83A0AE6112D; RAIL_EXPIRATION=1608365238025; RAIL_DEVICEID=pVOrHbFn8WzCANjWnqNGH1OGw3sw5K9Ehg5VA2cEhdjRvlVSk8fWs5vshisU8ar92a8t3MQuYjyeZHGl1x0N9SCRnuPvqm_ntcvaTlyudlIHziMwp_5VHT9t0WbDVQ0EB2wDPNmWwlKWmR8hcyz025A4bRpN6TRO; _jc_save_fromStation=%u5357%u4EAC%2CNJH; _jc_save_toStation=%u8FDE%u4E91%u6E2F%2CUIH; _jc_save_wfdc_flag=dc; BIGipServerpool_passport=249823754.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=854589962.38945.0000; _jc_save_toDate=2020-12-16; _jc_save_fromDate=2020-12-20',
           'X-Requested-With':'XMLHttpRequest'} 
response = requests.get(url, headers=headers)
html_info=response.text

通过正则提取页面内容

pat = "var station_names ='(.*?)';"
result = re.compile(pat,re.S).findall(html_info)[0]
datas = result.split("|")

将站名与三字码依次提取,并放入字典中

station_names = {}
for i in range(0,(len(datas)//5)):
    station_names[datas[1+5*i]]=datas[2+5*i]

查询余票

车次查询请求链接

url='https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2020-12-20&leftTicketDTO.from_station=NJH&leftTicketDTO.to_station=UIH&purpose_codes=ADULT'

模拟浏览器请求车次信息

headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36',
           'Cookie': '_uab_collina=160803690845585204105401; JSESSIONID=68E6E2A0E0FF66F4E416B83A0AE6112D; RAIL_EXPIRATION=1608365238025; RAIL_DEVICEID=pVOrHbFn8WzCANjWnqNGH1OGw3sw5K9Ehg5VA2cEhdjRvlVSk8fWs5vshisU8ar92a8t3MQuYjyeZHGl1x0N9SCRnuPvqm_ntcvaTlyudlIHziMwp_5VHT9t0WbDVQ0EB2wDPNmWwlKWmR8hcyz025A4bRpN6TRO; _jc_save_fromStation=%u5357%u4EAC%2CNJH; _jc_save_toStation=%u8FDE%u4E91%u6E2F%2CUIH; _jc_save_wfdc_flag=dc; BIGipServerpool_passport=249823754.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=854589962.38945.0000; _jc_save_toDate=2020-12-16; _jc_save_fromDate=2020-12-20',
           'X-Requested-With':'XMLHttpRequest'} 
response = requests.get(url, headers=headers)
html_info=response.text

提取车次和车站信息

ticket_info=json.loads(html_info)["data"]["result"]
map_info=json.loads(html_info)["data"]["map"]

处理车次数据

tickets = []
for i in ticket_info:
    tickets.append(i.split('|'))

展示所需车次信息

ticket=tickets[3]
print('出发日期:',ticket[13])
print('车次:',ticket[3])
print('出发站:',map_info[ticket[6]])
print('目的地:',map_info[ticket[7]])
print('发车时间:',ticket[8])
print('到达时间:',ticket[9])
print('历时时间:',ticket[10])
print('商务座:',ticket[32])
print('一等座:',ticket[31])
print('二等座:',ticket[30])
print('软卧:',ticket[23])
print('硬卧:',ticket[28])
print('硬座:',ticket[29])
print('无座:',ticket[26])

车次信息展示

出发日期: 20201220
车次: C3887
出发站: 南京南
目的地: 连云港
发车时间: 08:16
到达时间: 10:55
历时时间: 02:39
商务座: 10
一等座: 有
二等座: 有
软卧: 
硬卧: 
硬座: 
无座: 

标签: none

添加新评论