ソースを参照

台球选址爬虫

Your Name 1 年間 前
コミット
8c483cefc6

+ 3 - 0
.idea/.gitignore

@@ -0,0 +1,3 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml

+ 6 - 0
.idea/encodings.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding" defaultCharsetForPropertiesFiles="UTF-8">
+    <file url="PROJECT" charset="UTF-8" />
+  </component>
+</project>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 7 - 0
.idea/misc.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.8" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/python_project.iml" filepath="$PROJECT_DIR$/.idea/python_project.iml" />
+    </modules>
+  </component>
+</project>

+ 8 - 0
.idea/python_project.iml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

+ 0 - 0
README


+ 68 - 0
gaode_api.py

@@ -0,0 +1,68 @@
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from setting import GAODE_KEY, BANJING, LIANSUO_LIST, ZHUZHAI_COUNT, XUEXIAO_COUNT, XIEZILOU_COUNT, LIANSUO_COUNT, \
+    JINGPIN_COUNT
+from log import PPLogger
+
+
+logger = PPLogger(name='gaode_api')
+logger.setup_logger()
+def get_zhoubian(location=None):
+    data_dict = {'pipei': False, 'zhuzhai': [], 'xuexiao': [], 'xiezilou': [], 'jingpin': [], 'liansuo': []}
+    if not location:
+        return data_dict
+    api = 'https://restapi.amap.com/v5/place/around'
+    keywords = LIANSUO_LIST
+    params_list = [{
+        'key': GAODE_KEY,
+        'keywords': keyword,
+        'location': location,
+        'radius': str(BANJING),
+        'show_fields': 'business',
+        'page_size': '50'
+    } for keyword in keywords]
+    params = {
+        'key': GAODE_KEY,
+        'types': '080113|120201|120202|120203|120300|141201|141202|141206',
+        'location': location,
+        'radius': str(BANJING),
+        'show_fields': 'business',
+        'page_size': '50'
+    }
+    results_liansuo = []
+    with ThreadPoolExecutor() as pool:
+        r1 = [pool.submit(requests.get, api, p) for p in params_list]
+        for future1 in as_completed(r1):
+            results_liansuo.append(future1.result().json())
+        r2 = [pool.submit(requests.get, api, params)]
+        for future2 in as_completed(r2):
+            results_qita = future2.result().json()
+
+    if results_qita['infocode'] != '10000':
+        logger.error(results_qita)
+        return data_dict
+
+    for liansuo in results_liansuo:
+        data_dict['liansuo'].extend(liansuo['pois'])
+    for qita in results_qita['pois']:
+        if '写字楼' in qita['type']:
+            data_dict['xiezilou'].append(qita)
+        elif '住宅' in qita['type']:
+            data_dict['zhuzhai'].append(qita)
+        elif '学校' in qita['type']:
+            data_dict['xuexiao'].append(qita)
+        elif '台球' in qita['type']:
+            data_dict['jingpin'].append(qita)
+        else:
+            pass
+
+    if (len(data_dict['zhuzhai']) >= int(ZHUZHAI_COUNT) or len(data_dict['xuexiao']) >= int(XUEXIAO_COUNT) or len(
+            data_dict['xiezilou']) >= int(XIEZILOU_COUNT)) \
+            and len(data_dict['jingpin']) <= int(LIANSUO_COUNT) \
+            and len(data_dict['liansuo']) >= int(JINGPIN_COUNT):
+        data_dict['pipei'] = True
+
+    return data_dict
+
+
+# print(get_zhoubian('120.42716445444404,36.165840548830225'))

+ 44 - 0
log.py

@@ -0,0 +1,44 @@
+import logging
+import os
+from logging.handlers import RotatingFileHandler
+from setting import LOG_FILE, LOG_LEVEL, LOG_FORMAT, LOG_DIR
+
+
+class PPLogger(logging.Logger):
+    def __init__(self, name, level=logging.NOTSET):
+        super().__init__(name, level)
+
+    def setup_logger(self, log_file=LOG_FILE, log_level=LOG_LEVEL, log_format=None, max_bytes=5 * 1024 * 1024,
+                     backup_count=5):
+        # 设置日志格式
+        if log_format is None:
+            log_format = LOG_FORMAT
+        formatter = logging.Formatter(log_format)
+
+        # 设置日志级别
+        self.setLevel(log_level)
+
+        # 创建日志目录
+        log_dir = LOG_DIR
+        if not os.path.exists(log_dir):
+            os.makedirs(log_dir)
+
+        # 日志文件的路径
+        log_path = os.path.join(log_dir, log_file)
+
+        # 创建文件处理器
+        file_handler = RotatingFileHandler(log_path, maxBytes=max_bytes, backupCount=backup_count)
+        file_handler.setLevel(log_level)
+        file_handler.setFormatter(formatter)
+
+        # 添加处理器到日志器
+        self.addHandler(file_handler)
+
+        # 创建控制台处理器,并设置级别
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(log_level)
+        console_handler.setFormatter(formatter)
+
+        # 添加处理器到logger
+        self.addHandler(console_handler)
+

ファイルの差分が大きいため隠しています
+ 12631 - 0
logs/58spider.log


ファイルの差分が大きいため隠しています
+ 7 - 0
output_csv/laoshan_100_300_0_10000_1717137665891.csv


ファイルの差分が大きいため隠しています
+ 9 - 0
output_csv/laoshan_100_300_0_10000_1717139599325.csv


+ 62 - 0
processor.py

@@ -0,0 +1,62 @@
+import json
+import re
+from log import PPLogger
+from bs4 import BeautifulSoup
+from store import WuBaStore
+from setting import STORE_METHOD
+from urllib import parse
+from gaode_api import get_zhoubian
+
+class WuBaProcessor:
+    def __init__(self):
+        self.logger = PPLogger(name='58processor')
+        self.logger.setup_logger()
+        self.store = WuBaStore(STORE_METHOD)
+
+    def processor(self, detail_text, detail_url):
+        """
+        数据处理
+        :param detail_text: 商铺页面html
+        :param detail_url: 商铺页面url
+        :return:
+        """
+        soup_detail = BeautifulSoup(detail_text, 'lxml')
+        data_dict = dict()
+        try:
+            data_dict['title'] = soup_detail.find('div', class_='house-title').find('h1').text
+            data_dict['tags'] = [span.text for span in
+                                 soup_detail.find('div', class_='house-title').find('p').find_all('span')]
+            data_dict['money_month'] = ''.join(
+                [span.text for span in soup_detail.find('p', class_='house_basic_title_money').find_all('span')[:2]])
+            data_dict['money_day'] = soup_detail.find('p', class_='house_basic_title_money').find_all('span')[2].text
+            data_dict['area'] = soup_detail.find('p', class_='house_basic_title_info').find('span').text
+            data_dict['type'] = soup_detail.find('p', class_='house_basic_title_info').find_all('span')[1].text
+            data_dict['qizuqi'] = soup_detail.find('p', class_='house_basic_title_info').find_all('span')[2].text
+            data_dict['address'] = soup_detail.find('h3', class_='general-weizhi-title').text
+            data_dict['poster_name'] = soup_detail.find('span', class_='name-text').text
+            data_dict['poster_phone'] = re.search(r'"phone":"(.*?)",', detail_text).group(1)
+            data_dict['intro'] = [{li.find('span', class_='title').text: li.find('span', class_='content').text} for li
+                                  in soup_detail.find('ul', class_='general-item-wrap').find_all('li')]
+            data_dict['miaoshu'] = [{div.find('p').text: div.find('article').text} for div in
+                                    soup_detail.find_all('div', class_='des-item')]
+            data_dict['peitao'] = [li.text for li in soup_detail.find_all('li', class_='peitao-on')]
+            data_dict['pics'] = [parse.unquote(img.attrs['src']) for img in
+                                 soup_detail.find('ul', class_='general-pic-list').find_all('img')]
+            data_dict['location'] = soup_detail.find('meta', attrs={'name': 'location'}).attrs['content']
+            data_dict['url'] = parse.unquote(detail_url)
+
+            data_dict_json = json.loads(
+                re.sub(r'\\n|\s|\\xa0|询问卖方心理预期?|询问具体转让内容?|位置-', '', str(data_dict)).replace("'", '"'))
+            # 手机号转文本格式
+            data_dict_json['poster_phone'] = data_dict_json['poster_phone']+'\t' if data_dict_json['poster_phone'] else '未公开手机号,点击待租商铺链接调用拨打'
+            # 提取经纬度
+            temp_list = data_dict_json['location'].split('=')[-1].split(',') if data_dict_json['location'] else None
+            data_dict_json['location'] = f"{float(temp_list[0]):.6f},{float(temp_list[0]):.6f}" if temp_list else None
+            # 调用高德API获取量化周边数据
+            data_dict_json.update(get_zhoubian(data_dict_json['location']))
+            self.logger.info(data_dict_json)
+            # 持久化
+            self.store.run(data_dict_json)
+        except Exception as e:
+            self.logger.error(detail_url)
+            self.logger.error(e)

+ 5 - 0
requirements.txt

@@ -0,0 +1,5 @@
+beautifulsoup4==4.12.3
+pandas==2.0.3
+PyExecJS==1.5.1
+Requests==2.32.3
+lxml

+ 3 - 0
run.py

@@ -0,0 +1,3 @@
+from spider import WuBaSpider
+
+WuBaSpider().start()

+ 69 - 0
setting.py

@@ -0,0 +1,69 @@
+import time
+
+# 爬虫的日志级别
+LOG_LEVEL = 'INFO'
+
+# 日志文件夹
+LOG_DIR = 'logs'
+
+# 爬虫的日志文件路径
+LOG_FILE = '58spider.log'
+
+# 日志格式
+LOG_FORMAT = '%(asctime)s - %(filename)s - %(lineno)d - %(name)s - %(levelname)s - %(message)s'
+
+# 爬虫的并发请求数量
+CONCURRENT_REQUESTS = 5
+
+# 爬虫的User-Agent
+USER_AGENT = [
+    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
+    "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"
+]
+
+# 爬虫的IP代理
+DOWNLOADER_IP_PROXY = 'https://api.xiaoxiangdaili.com/ip/get?appKey=1111188555084222464&appSecret=Z6eCYDT6&cnt=1&wt=json&method=http&city=&province='
+
+# 区名
+_QU = 'laoshan'
+# 面积
+_AREA = '100_300'
+# 房租
+_MONEY = '0_10000'
+# 爬取的起始URL
+START_URL = f'https://qd.58.com/{_QU}/shangpucz/pn1/?area={_AREA}&huansuanyue={_MONEY}'
+
+# 持久化方式
+STORE_METHOD = 'csv'
+
+# csv文件夹
+CSV_DIR = 'output_csv'
+
+# csv文件名
+CSV_FILENAME = f'{_QU}_{_AREA}_{_MONEY}_{int(time.time() * 1000)}.csv'
+
+# 高德web服务API-key
+GAODE_KEY = '819ddb3d210100e53d651dbc7ae8f11b'
+
+# 量化-半径
+BANJING = '750'
+
+# 量化-住宅个数
+ZHUZHAI_COUNT = 3
+
+# 量化-学校个数
+XUEXIAO_COUNT = 2
+
+# 量化-写字楼个数
+XIEZILOU_COUNT = 1
+
+# 量化-竞品门店个数
+JINGPIN_COUNT = 2
+
+# 量化-连锁品牌个数
+LIANSUO_COUNT = 2
+
+# 量化-连锁品牌名
+LIANSUO_LIST = ['好想来', '萨么', '丹香', '赵一鸣', '零食很忙', '糖巢', '大家乐', '元祖', '米兰西饼', '85度C', '幸福西饼', '好利来']

+ 165 - 0
spider.py

@@ -0,0 +1,165 @@
+import os
+import requests
+from bs4 import BeautifulSoup
+from processor import WuBaProcessor
+import time
+import random
+from verify.yidun import YiDun
+# from threading import Lock
+from queue import Queue
+from log import PPLogger
+from setting import CONCURRENT_REQUESTS, USER_AGENT, DOWNLOADER_IP_PROXY, START_URL
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+
+class WuBaSpider:
+    def __init__(self):
+        self.ip_proxy = DOWNLOADER_IP_PROXY  # 代理ip请求url
+        self.ua = USER_AGENT
+        self.headers = {
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "accept-encoding": "gzip, deflate, br",
+            "accept-language": "zh-CN,zh;q=0.9",
+            "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"
+        }
+        self.yidun = YiDun()  # 易盾检测
+        self.q = Queue()
+        self.logger = PPLogger(name='58spider')
+        self.logger.setup_logger()
+        self.psr = WuBaProcessor()  # 数据处理
+        self.ip_pool = []  # 代理ip池
+        # self.lock = Lock()
+
+    def new_session(self, session_=None):
+        """
+        重置session,添加代理
+        :param session_: 旧session
+        :return:
+        """
+        if session_:
+            self.ip_pool.remove(session_.proxies)
+            self.logger.info(f'ip_pool remove {session_.proxies}')
+        if not self.ip_pool:
+            res_ip = requests.get(self.ip_proxy).json()
+            self.logger.info(res_ip)
+            if res_ip['code'] != 200:
+                os._exit(0)  # 代理ip请求失败,结束程序
+            for i in res_ip['data']:
+                self.ip_pool.append({
+                    'http': f'http://{i["ip"]}:{i["port"]}',
+                    'https': f'http://{i["ip"]}:{i["port"]}'
+                })
+        session = requests.Session()
+        session.proxies = random.choice(self.ip_pool)
+        return session
+
+    def get_page_urls(self, url):
+        """
+        获取所有商铺列表页
+        :param url: 初始url
+        :return:商铺列表页url列表
+        """
+        session = self.new_session()
+        # url = 'https://qd.58.com/licang/shangpucz/pn1/?area=100_300&huansuanyue=0_10000'
+        try:
+            res = session.get(url, headers=self.headers, allow_redirects=True, timeout=10)
+        except OSError as e:
+            self.logger.error(e)
+            session = self.new_session(session)
+            res = session.get(url, headers=self.headers, allow_redirects=True, timeout=10)
+        # print(res.text)
+        # 易盾检测
+        if 'verifycode' in res.url:
+            session = self.yidun.verify(session, res.url)
+            try:
+                res = session.get(url, headers=self.headers, allow_redirects=True, timeout=10)
+            except OSError as e:
+                self.logger.error(e)
+                return None
+        soup = BeautifulSoup(res.text, 'lxml')
+        end_page = int(soup.find('div', class_='pager').find_all('span')[-2].text)
+        # end_page = 8
+        all_pages = [url.replace('pn1', f'pn{i + 1}') for i in range(end_page)]
+        # if not all_pages:
+        #     self.logger.info('no pages data')
+        #     os._exit(0)
+        # session = requests.Session()
+        self.logger.info(f'all_pages {len(all_pages)}')
+        return all_pages
+
+    def get_detail_urls(self, page_url):
+        """
+        获取列表页上所有商铺详情页链接
+        :param page_url: 商铺列表页url
+        """
+        session = self.new_session()
+        try:
+            res_page = session.get(page_url, headers=self.headers, allow_redirects=True, timeout=10)
+        except OSError as e:
+            self.logger.error(e)
+            session = self.new_session(session)
+            res_page = session.get(page_url, headers=self.headers, allow_redirects=True, timeout=10)
+        # 易盾检测
+        if 'verifycode' in res_page.url:
+            session = self.yidun.verify(session, res_page.url)
+            res_page = session.get(page_url, headers=self.headers, allow_redirects=True, timeout=10)
+
+        soup_page = BeautifulSoup(res_page.text, 'lxml')
+        detail_urls = [li.find('a').attrs['href'] for li in soup_page.find('div', class_='content-side-left').find_all('li')]
+        for detail_url in detail_urls:
+            self.q.put_nowait(detail_url)  # 加入队列
+            self.logger.info(f'detail_url {detail_url}')
+
+    def get_content(self):
+        """
+        获取商铺详情页数据
+        """
+        session = self.new_session()
+        while True:
+            if self.q.empty():
+                time.sleep(5)  # 无数据等待5秒再退出
+                if self.q.empty():
+                    break
+            detail_url = self.q.get()
+            self.headers['user-agent'] = random.choice(self.ua)
+            try:
+                res_detail = session.get(detail_url, headers=self.headers, timeout=10)
+            except OSError as e:
+                session = self.new_session(session)
+                self.logger.error(e)
+                self.q.put_nowait(detail_url)  # 请求失败后,url放回队列
+                continue
+            else:
+                self.logger.info(res_detail.url)
+                # 极验检测
+                if 'geetest' in res_detail.text:
+                    self.q.put_nowait(detail_url)
+                    continue
+                # 易盾检测
+                elif 'verifycode' in res_detail.url:
+                    session = self.yidun.verify(session, res_detail.url)
+                    try:
+                        res_detail = session.get(detail_url, headers=self.headers, allow_redirects=True, timeout=10)
+                    except OSError as e:
+                        self.logger.error(e)
+                        self.q.put_nowait(detail_url)
+                        continue
+                # IP检测
+                elif 'verifylogin' in res_detail.url or 'deny' in res_detail.url:
+                    session = self.new_session(session)
+                    self.q.put_nowait(detail_url)
+                    continue
+                # with self.lock:  # 文件写入锁
+                self.psr.processor(res_detail.text, res_detail.url)
+
+    def start(self):
+        page_urls = self.get_page_urls(START_URL)
+        with ThreadPoolExecutor() as pool:
+            results1 = [pool.submit(self.get_detail_urls, page_url) for page_url in page_urls[:1]]
+            while self.q.empty():
+                time.sleep(1)
+            results2 = [pool.submit(self.get_content) for _ in range(CONCURRENT_REQUESTS)]
+
+
+
+# WuBaSpider().start()

+ 36 - 0
store.py

@@ -0,0 +1,36 @@
+import os
+import pandas as pd
+from log import PPLogger
+from setting import CSV_FILENAME, CSV_DIR
+from threading import Lock
+
+class WuBaStore:
+    def __init__(self, method):
+        """
+        持久化
+        :param method: 持久化方式 :'csv',,,
+        """
+        self.method = method
+        self.logger = PPLogger(name='store')
+        self.logger.setup_logger()
+        self.lock = Lock()
+
+    def run(self, *args):
+        if self.method == 'csv':
+            self.to_csv(args[0])
+        else:
+            pass
+
+    def to_csv(self, data_dict: dict, file_dir: str = CSV_DIR, file_name: str = CSV_FILENAME):
+        if not os.path.exists(file_dir):
+            os.mkdir(file_dir)
+        try:
+            file_path = os.path.join(file_dir, file_name)
+            data = pd.DataFrame([data_dict])
+            with self.lock:
+                if not os.path.exists(file_path):
+                    data.to_csv(path_or_buf=file_path, index=False, header=True, encoding='utf8')
+                else:
+                    data.to_csv(path_or_buf=file_path, index=False, header=False, encoding='utf8', mode='a')
+        except Exception as e:
+            self.logger.error(e)

+ 6 - 0
test.py

@@ -0,0 +1,6 @@
+import pandas as pd
+
+a = {'1':str(12222222222222)+'\t'}
+da = pd.DataFrame([a])
+
+da.to_csv('1.csv')

ファイルの差分が大きいため隠しています
+ 69 - 0
verify/geetest.py


ファイルの差分が大きいため隠しています
+ 11835 - 0
verify/yidun.js


+ 170 - 0
verify/yidun.py

@@ -0,0 +1,170 @@
+import json
+import time
+import requests
+import re
+from bs4 import BeautifulSoup
+import execjs
+from log import PPLogger
+
+class YiDun():
+    def __init__(self):
+        self.logger = PPLogger(name='yidun_verify')
+        self.logger.setup_logger()
+
+    def verify(self, session, url1):
+        try:
+            with open('./verify/yidun.js', 'r', encoding='utf8') as f:
+                jscode = f.read()
+            self.logger.info('yidun-verify')
+            # session = requests.Session()
+            headers = {
+                "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"
+            }
+            # url1 = 'https://callback.58.com/antibot/verifycode?serialId=d7026297c2c26f8550dd61e011b05a13_e3df224043e941b197ac36b476201772&code=300&sign=5de1e434335d349a2a2bc187ff886a4f&namespace=fangchan_business_pc&url=https%253A%252F%252Fqd.58.com%252Flicang%252Fshangpucz%252Fpn1%252F%253Farea%253D100_300%2526huansuanyue%253D0_10000'
+            res1 = session.get(url1, headers=headers)
+            soup1 = BeautifulSoup(res1.text, 'lxml')
+            serialid = soup1.find('input', attrs={'id': "serialId"}).attrs['value']
+            sign = soup1.find('input', attrs={'id': "sign"}).attrs['value']
+            url = soup1.find('input', attrs={'id': "url"}).attrs['value']
+            # print(url)
+
+            url2 = f'https://callback.58.com/antibot/yidun/register.do?t={int(time.time() * 1000)}'
+            data2 = {
+                "serialId": serialid,
+                "code": "300",
+                "sign": sign,
+                "url": url,
+                "namespace": "fangchan_business_pc"
+            }
+            headers2 = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
+                "origin": "https://callback.58.com",
+                "referer": url1,
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36",
+                "x-requested-with": "XMLHttpRequest"
+            }
+            res2 = session.post(url2, headers=headers2, data=data2)
+            self.logger.info(res2.text)
+            res2_data = res2.json()['data']
+
+            url3 = 'https://c.dun.163.com/api/v2/getconf'
+            params3 = {
+                "referer": "https://callback.58.com/antibot/verifycode",
+                "zoneId": "",
+                "id": res2_data,
+                "ipv6": "false",
+                "runEnv": "10",
+                "iv": "4",
+                "loadVersion": "2.4.0",
+                "lang": "zh-CN",
+                "callback": "__JSONP_xsv67jt_0"
+            }
+            header3 = {
+                "Accept": "*/*",
+                "Accept-Encoding": "gzip, deflate, br",
+                "Accept-Language": "zh-CN,zh;q=0.9",
+                "Connection": "keep-alive",
+                "Host": "c.dun.163.com",
+                "Referer": "https://callback.58.com/",
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"
+            }
+            res3 = session.get(url3, headers=header3, params=params3)
+            res3_json = json.loads(re.search('\{.*\}', res3.text).group())
+            ac_token = res3_json['data']['ac']['token']
+            dt = res3_json['data']['dt']
+
+            url4 = 'https://c.dun.163.com/api/v3/get'
+            fp = execjs.compile(jscode).call('get_fp')
+            cb = execjs.compile(jscode).call('get_cb')
+            params4 = {
+                "referer": "https://callback.58.com/antibot/verifycode",
+                "zoneId": "CN31",
+                "dt": dt,
+                "id": res2_data,
+                "fp": fp,
+                "https": "true",
+                "type": "undefined",
+                "width": "",
+                "sizeType": "undefined",
+                "version": "2.27.2",
+                "dpr": "1",
+                "dev": "1",
+                "cb": cb,
+                "acToken": "",
+                "ipv6": "false",
+                "runEnv": "10",
+                "group": "",
+                "scene": "",
+                "sdkVersion": "undefined",
+                "iv": "4",
+                "smsVersion": "v3",
+                "callback": "__JSONP_65el8vu_0"
+            }
+            res4 = session.get(url4, headers=header3, params=params4)
+            res4_json = json.loads(re.search('\{.*\}', res4.text).group())
+            token = res4_json['data']['token']
+
+            url5 = 'https://c.dun.163.com/api/v3/check'
+            headers5 = {
+                "Accept": "*/*",
+                "Accept-Encoding": "gzip, deflate, br",
+                "Accept-Language": "zh-CN,zh;q=0.9",
+                "Connection": "keep-alive",
+                "Host": "c.dun.163.com",
+                "Referer": "https://callback.58.com/",
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"
+            }
+            data = execjs.compile(jscode).call('get_data', token)
+            params5 = {
+                "referer": "https://callback.58.com/antibot/verifycode",
+                "zoneId": "CN31",
+                "dt": dt,
+                "id": res2_data,
+                "version": "2.27.2",
+                "cb": cb,
+                "extraData": "",
+                "bf": "0",
+                "runEnv": "10",
+                "sdkVersion": "undefined",
+                "iv": "4",
+                "token": token,
+                "type": "5",
+                "width": "240",
+                "data": data,
+                "callback": '__JSONP_4oixuod_1'
+            }
+            res5 = session.get(url5, headers=headers5, params=params5)
+            res5_json = json.loads(re.search('\{.*\}', res5.text).group())
+            validate = res5_json['data']['validate']
+
+            url6 = 'https://callback.58.com/antibot/yidun/checkcode.do'
+            headers6 = {
+                "accept": "*/*",
+                "accept-encoding": "gzip, deflate, br",
+                "accept-language": "zh-CN,zh;q=0.9",
+                "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
+                "origin": "https://callback.58.com",
+                "pragma": "no-cache",
+                "referer": url1,
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"
+            }
+            validate6 = execjs.compile(jscode).call('get_validate', fp, validate)
+            data6 = {
+                "namespace": "fangchan_business_pc",
+                "url": url,
+                "serialId": serialid,
+                "validate": validate6
+            }
+            res6 = session.post(url6, headers=headers6, data=data6)
+            self.logger.info(res6.text)
+            url7 = res6.json()['msg']
+            session.get(url7, headers=headers)
+        except Exception as e:
+            self.logger.error(e)
+        return session