setting.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import time
  2. # 爬虫的日志级别
  3. LOG_LEVEL = 'INFO'
  4. # 日志文件夹
  5. LOG_DIR = 'logs'
  6. # 爬虫的日志文件路径
  7. LOG_FILE = '58spider.log'
  8. # 日志格式
  9. LOG_FORMAT = '%(asctime)s - %(filename)s - %(lineno)d - %(name)s - %(levelname)s - %(message)s'
  10. # 爬虫的并发请求数量
  11. CONCURRENT_REQUESTS = 3
  12. # 爬虫的User-Agent
  13. USER_AGENT = [
  14. "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
  15. # "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
  16. # "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
  17. # "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"
  18. ]
  19. # 爬虫的IP代理
  20. DOWNLOADER_IP_PROXY = 'https://api.xiaoxiangdaili.com/ip/get?appKey=1114815955776983040&appSecret=M4RS3NpN&cnt=2&wt=json&method=http&city=&province='
  21. # 区名
  22. _QU = 'jimo'
  23. # 面积
  24. _AREA = '100_300'
  25. # 房租
  26. _MONEY = '0_10000'
  27. # 爬取的起始URL
  28. START_URL = f'https://qd.58.com/{_QU}/shangpucz/pn1/?area={_AREA}&huansuanyue={_MONEY}'
  29. # 持久化方式
  30. STORE_METHOD = 'csv'
  31. # csv文件夹
  32. CSV_DIR = 'output_csv'
  33. # csv文件名
  34. CSV_FILENAME = f'{_QU}_{_AREA}_{_MONEY}_{int(time.time() * 1000)}.csv'
  35. # 高德web服务API-key
  36. GAODE_KEY = '515a64d5324a70ba9c5a95f9539370ec'
  37. # 量化-半径
  38. BANJING = '750'
  39. # 量化-住宅个数
  40. ZHUZHAI_COUNT = 3
  41. # 量化-学校个数
  42. XUEXIAO_COUNT = 2
  43. # 量化-写字楼个数
  44. XIEZILOU_COUNT = 1
  45. # 量化-竞品门店个数
  46. JINGPIN_COUNT = 2
  47. # 量化-连锁品牌个数
  48. LIANSUO_COUNT = 2
  49. # 量化-连锁品牌名
  50. LIANSUO_LIST = ['好想来', '萨么', '丹香', '赵一鸣', '零食很忙', '糖巢', '大家乐', '元祖', '米兰西饼', '85度C', '幸福西饼', '好利来']