引入

当在12306上查车次时,例如输入北京-上海时,返回的结果里车次往往会有出发站点是北京南,到达站点里也会有上海虹桥,这里可能会让人觉得12306查车次可以按城市查询,但是12306查车次时并不支持城市,12306或者其他二方三方渠道里都是按照车站来查询车次的。因此在车次查询过程中,针对一些车站,在查询时会顺带返回同城市的其他车站的车次结果。

如下图:

捕获.PNG

通查站

这个概念是我在网上找到的,不一定是12306的官方概念,在此之前我没有找到词汇来描述这个机制。

以北京站为例,北京站的通查站有,['北京', '北京东', '北京北', '北京南', '北京西', '怀柔', '怀柔北', '昌平北', '清河']

因此对于北京站来说,在搜索从北京站出发的车次时,把列表里的任意一个车站放到出发站位置发出搜索请求,返回的车次结果(车次,车次号)都是一样的。

比如想要从北京站出发,到上海虹桥站,搜索时用北京东、北京南、怀柔等车站搜索到上海虹桥的车次时,结果都是一样的。

如何确定各个车站的通查站有哪些

12306有一个接口:https://www.12306.cn/kfzmpt/czxx/query?train_start_date=2020-12-27&train_station_name=%E5%8C%97%E4%BA%AC%E4%B8%9C&train_station_code=BOP&randCode=

访问这个接口会返回指定日期指定站点停靠的车次信息,当然,里边也会返回这个车站的通查站:

{
  "validateMessagesShowId": "_validatorMessage",
  "status": true,
  "httpstatus": 200,
  "data": {
    "flag": true,
    "data": [
      {
        "start_train_date": "20201227",
        "train_no": "240000G4910U",
        "start_station_telecode": "BXP",
        "start_station_name": "北京西",
        "start_start_time": "12:33",
        "end_station_telecode": "GOG",
        "end_station_name": "赣州西",
        "end_arrive_time": "22:22",
        "train_type_code": "2",
        "train_type_name": "直通",
        "train_class_code": "8",
        "train_class_name": "高速",
        "seat_types": "29360640",
        "service_type": "1",
        "station_no": "01",
        "display_station_no": "0",
        "station_name": "北京西",
        "station_telecode": "BXP",
        "station_train_code": "G491",
        "bureau_code": "P",
        "arrive_day_diff": "0",
        "arrive_time": "----",
        "update_arrive_time": "----",
        "start_time": "12:33",
        "update_start_time": "----",
        "start_day_diff": "0",
        "start_date": "2020-10-11 00:00:00",
        "stop_date": "2020-12-28 00:00:00",
        "station_train_date": "20201227",
        "city_code": "----",
        "same_city_code": "0357",
        "stopover_time": "0",
        "running_time": "--"
      },

      {
        "start_train_date": "20201227",
        "train_no": "24000044730W",
        "start_station_telecode": "CDP",
        "start_station_name": "承德",
        "start_start_time": "06:45",
        "end_station_telecode": "VBP",
        "end_station_name": "昌平北",
        "end_arrive_time": "14:47",
        "train_type_code": "3",
        "train_type_name": "局管",
        "train_class_code": "3",
        "train_class_name": "普快",
        "seat_types": "2",
        "service_type": "0",
        "station_no": "20",
        "display_station_no": "0",
        "station_name": "怀柔北",
        "station_telecode": "HBP",
        "station_train_code": "4472",
        "bureau_code": "P",
        "arrive_day_diff": "0",
        "arrive_time": "13:24",
        "update_arrive_time": "----",
        "start_time": "13:32",
        "update_start_time": "----",
        "start_day_diff": "0",
        "start_date": "2020-10-11 00:00:00",
        "stop_date": "2021-01-19 00:00:00",
        "station_train_date": "20201227",
        "city_code": "----",
        "same_city_code": "0357",
        "stopover_time": "8",
        "running_time": "6小时39分"
      }
    ],
    "sameStations": [
      "北京",
      "北京东",
      "北京北",
      "北京南",
      "北京西",
      "怀柔",
      "怀柔北",
      "昌平北",
      "清河"
    ]
  },
  "messages": [],
  "validateMessages": {}
}

这里sameStations就是指通查站。

站名三字码提取

import urllib.request
import requests
import ssl
import time
import json
import re

#站名三字码提取
station_url = "https://www.12306.cn/index/script/core/common/station_name_v10107.js"

#模拟浏览器
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36',
           'Cookie': '_uab_collina=160803690845585204105401; JSESSIONID=68E6E2A0E0FF66F4E416B83A0AE6112D; RAIL_EXPIRATION=1608365238025; RAIL_DEVICEID=pVOrHbFn8WzCANjWnqNGH1OGw3sw5K9Ehg5VA2cEhdjRvlVSk8fWs5vshisU8ar92a8t3MQuYjyeZHGl1x0N9SCRnuPvqm_ntcvaTlyudlIHziMwp_5VHT9t0WbDVQ0EB2wDPNmWwlKWmR8hcyz025A4bRpN6TRO; _jc_save_fromStation=%u5357%u4EAC%2CNJH; _jc_save_toStation=%u8FDE%u4E91%u6E2F%2CUIH; _jc_save_wfdc_flag=dc; BIGipServerpool_passport=249823754.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=854589962.38945.0000; _jc_save_toDate=2020-12-16; _jc_save_fromDate=2020-12-20',
           'X-Requested-With':'XMLHttpRequest'} 
response = requests.get(station_url, headers=headers)
station_info = response.text

车站信息处理

# 整理车站信息
station_info_list = station_info[station_info.find('\'')+1:-2].split("@")
station_info_list.remove('')

stations_info = []
for station_str in station_info_list:
    stations_info.append(station_str.split('|'))

print("Totally {} stations, the last index is {}".format(len(stations_info), stations_info[-1][-1]))
# 需要查询的车站索引
station_map = {}
for station in stations_info:
    station_name = station[1]
    station_code = station[2]
    mark = False
    success = False

    station = {}
    station["station_name"] = station_name
    station["station_code"] = station_code
    station["mark"] = mark
    station["success"] = success

    station_map[station_name] = station

stations = []
for key in station_map:
    stations.append(key)

接口调用函数

def get_same_stations(train_start_date, train_station_name, train_station_code):
    # 转换中文为url编码
    train_station_name_url = urllib.request.quote(train_station_name)
    # 构造请求地址
    url = "https://www.12306.cn/kfzmpt/czxx/query?train_start_date="+train_start_date+"&train_station_name="+train_station_name_url+"&train_station_code="+train_station_code+"&randCode="

    #模拟浏览器
    headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36',
               'Cookie': '_uab_collina=160803690845585204105401; JSESSIONID=68E6E2A0E0FF66F4E416B83A0AE6112D; RAIL_EXPIRATION=1608365238025; RAIL_DEVICEID=pVOrHbFn8WzCANjWnqNGH1OGw3sw5K9Ehg5VA2cEhdjRvlVSk8fWs5vshisU8ar92a8t3MQuYjyeZHGl1x0N9SCRnuPvqm_ntcvaTlyudlIHziMwp_5VHT9t0WbDVQ0EB2wDPNmWwlKWmR8hcyz025A4bRpN6TRO; _jc_save_fromStation=%u5357%u4EAC%2CNJH; _jc_save_toStation=%u8FDE%u4E91%u6E2F%2CUIH; _jc_save_wfdc_flag=dc; BIGipServerpool_passport=249823754.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=854589962.38945.0000; _jc_save_toDate=2020-12-16; _jc_save_fromDate=2020-12-20',
               'X-Requested-With':'XMLHttpRequest'} 
    response = requests.get(url, headers=headers)
    query_info=response.text

    sameStations_info=json.loads(query_info)['data']["sameStations"]

    return sameStations_info

查询通查站

接口调用别太频繁
sameStations_map = {}
sameStations_false = []
sameStations_not_in_dict = []
for staion in stations:
    # 提取基本车站日期信息
    train_start_date = "2020-12-27"
    train_station_code = station_map[staion]["station_code"]

    # 检查查询标记,如果查过了或者没必要查,跳过本次处理进行下一个车站查询
    mark = station_map[staion]["mark"]
    if mark:
        continue

    # 向12306发送查询
    sameStations_info = get_same_stations(train_start_date, staion, train_station_code)

    # 标记车站为已查询
    station_map[staion]["mark"] = True

    # 查询的车站没有通查站列表
    if len(sameStations_info) < 1:
        sameStations_false.append(staion)
        print(staion, train_station_code, sameStations_info)

    # 查询通查站成功
    else:
        station_map[staion]["success"] = True
        print("station {} query success same stations {}".format(staion, sameStations_info))

        # 把通查站里不需要查的车站标记出来
        for sameStation in sameStations_info:
            sameStation_format = sameStation.replace( ' ' , '' )
            # 记录不在车站字典里的通查车站
            try:
                sameStation_mark = station_map[sameStation_format]["mark"]
            except KeyError:
                print("{} not in station dict".format(sameStation))
                sameStations_not_in_dict.append(sameStation_format)
                continue
            else:
                # 标记通查站
                if sameStation_mark == False:
                    station_map[sameStation_format]["mark"] = True

        # 存储通查站信息
        sameStations_map[staion] = sameStations_info

    time.sleep(3)

保存原始数据

处理通查站数据

# 处理每个站点的通查站列表里的空格
for key in sameStations_map:

    sameStations = sameStations_map[key]
    new_sameStations = []

    for same_station in sameStations:
        format_same_station = same_station.replace( ' ' , '' )

        if same_station == format_same_station:
            new_sameStations.append(same_station)
        else:
            print("key:{} --- sameStations:{}, format_same_station:{}".format(key, same_station, format_same_station))
            try:
                index_ = sameStations.index(format_same_station)
            except ValueError:
                index_ = -1
            else:

                if index_ != -1:
                    continue
                else:
                    new_sameStations.append(format_same_station)
    sameStations_map[key] = new_sameStations
# 处理每个站点名座位key时有空格的情况
delete_key = []
add_map = {}
sameStations_keys = list(sameStations_map.keys())
for key in sameStations_map:
    format_station = key.replace( ' ' , '' )
    # 如果key没问题
    if key == format_station:
        continue
    # 如果key有空格
    else:
        # 如果去空格后的车站名在列表里
        if format_station in sameStations_keys:
            key_sameStations = sameStations_map[key]
            format_key_sameStations = sameStations_map[format_station]
            # 如果key和去空格之后的key,其通查站相同,记录准备删除
            if key_sameStations == format_key_sameStations:
                delete_key.append(key)
            # 如果key和去空格之后的key,其通查站不相同
            else:
                # 标记原始站名准备删除,同时新站名及其通查站记录下来准备添加
                delete_key.append(key)
                # 取并集
                add_map[format_station] = list(set(key_sameStations).union(set(format_key_sameStations)))
        # 如果去空格后的车站名不在列表里
        else:
            # 记录原始站名,标记删除
            delete_key.append(key)
            add_map[format_station] = sameStations_map[key]
# 删除标记的站点
for delkey in delete_key:
    sameStations_map.pop(delkey)
# 合并原始字典和新添加的字典
sameStations_map.update(add_map)
# 如果车站本身不在其通查站列表里,添加进去
for key in sameStations_map:
    if key not in sameStations_map[key]:
        print("{} not in {}".format(key,sameStations_map[key]))
        sameStations_map[key].append(key)

其他情况处理

# 返回空通查站列表的车站,通查站设为自己

del_sameStations_false = []
add_sameStations_false = []
for false_station in sameStations_false:
    format_false_station = false_station.replace( ' ' , '' )
    if false_station != format_false_station:
        del_sameStations_false.append(false_station)
        add_sameStations_false.append(format_false_station)
#         sameStations_false.remove(false_station)


if not len(del_sameStations_false) + len(add_sameStations_false):
    for false_station in sameStations_false:
        sameStations_map[false_station] = [false_station]

以下车站虽然在通查站列表里出现,但是不在12306返回的车站字典里,在12306查询车次时也无法识别车站名。

可以暂不处理,或者把这些车站从通查站字典里删除。

['包头北', '包头西', '古冶', '唐山南', '乌海北', '咸阳北', '盐津北', '彬州东', '大足', '礼泉南', '永寿西']

统计

station_num = 0
for key, value in sameStations_map.items():
    station_num += len(value)
print("After process, there are {} stations.".format(station_num))
After process, there are 3230 stations.

通查站最终数据:

标签: none

添加新评论