python实现自动采集VIP课程帖子中的百度网盘链接-资源共享吧|易语言论坛|逆向破解教程|辅助开发教程|网络安全教程|www.zygx8.com|我的开发技术随记

请我喝茶 发表于 2020-5-5 13:48:08

python实现自动采集VIP课程帖子中的百度网盘链接

本帖最后由请我喝茶于 2020-5-5 14:15 编辑

# -*- coding: utf-8 -*-

# Author: 桑葚ICE
# Email: 152516cc@gmail.com
# Blog: iicey.github.io
# JueJin: juejin.im/user/5c64dce8e51d45013c40742c
import re
import time

import requests
from scrapy import Selector

class Spider:

def __init__(self, un="", pwd="", cookie=""):
   self.un = un
   self.pwd = pwd
   self.cookies = {}
   self.headers = {"cookie": cookie}
   self.bd_url_info = {}

def enter(self):
   params = {
         "mod": "logging",
         "action": "login",
         "loginsubmit": "yes",
         "infloat": "yes",
         "lssubmit": "yes",
         "inajax": "1"
   }
   data = {
         "fastloginfield": "username",
         "username": self.un,
         "password": self.pwd,
         "quickforward": "yes",
         "handlekey": "ls",
   }
   response = requests.post(
         'https://www.zygx8.com/member.php',
         headers=self.headers,
         params=params,
         data=data
   )
   self.cookies = requests.utils.dict_from_cookiejar(response.cookies)

def fid_to_tid(self, fid, page=1, tid_s=None):
   """
   :param fid:
   :param page:
   :return:
   """
   params = (
         ('mod', 'forumdisplay'),
         ('fid', fid),
         ('page', page),
         ('t', '5104641'),
   )

   response = requests.get(
         'https://www.zygx8.com/forum.php',
         headers=self.headers,
         # cookies=self.cookies,
         params=params
   )
   ret = Selector(response)
   tid_l = ret.xpath('//*[@id="threadlisttableid"]/tbody/@id').extract()
   if not tid_s:
         tid_s = set()
   old_count = len(tid_s)
   for i in tid_l:
         tid_s.add(i.replace("normalthread_", ""))
   new_count = len(tid_s)
   if new_count != old_count:
         page += 1
         return self.fid_to_tid(fid, page=page, tid_s=tid_s)
   else:
         return tid_s

def get_content(self, fid, tid):
   response = requests.get(
         f"https://www.zygx8.com/forum.php?mod=viewthread&tid={tid}&extra=page%3D1",
         headers=self.headers
   )
   if "如果您要查看本帖隐藏内容请" in response.text:
         self.post_content(fid, tid)
         time.sleep(60)
         return self.get_content(fid, tid)
   else:
         # time.sleep(1)
         result = Selector(response)
         url_info = result.xpath('//div[@class="showhide"]//text()').extract()
         try:
            bd_url = for i in url_info if "https://" in i][0]
            bd_pwd = re.findall(r"\w\w\w\w", for i in url_info if "提取码" in i][0])[0]

            print(bd_url, bd_pwd)
            self.bd_url_info = bd_pwd
         except IndexError as e:
            print(e, response.url)

def post_content(self, fid, tid):
   """
   :param fid:
   :param tid:
   :return:
   """
   params = (
         ('mod', 'post'),
         ('infloat', 'yes'),
         ('action', 'reply'),
         ('fid', fid),
         ('extra', ''),
         ('tid', tid),
         ('replysubmit', 'yes'),
         ('inajax', '1'),
   )

   data = {
         'formhash': '78484d61',
         'handlekey': 'reply',
         'noticeauthor': '',
         'noticetrimstr': '',
         'noticeauthormsg': '',
         'usesig': '0',
         'subject': '',
         'message': '666'
   }

   response = requests.post('https://www.zygx8.com/forum.php',
                              headers=self.headers, params=params, data=data)

def main(self):
   pass

if __name__ == '__main__':
cookie = "浏览器F12打开取出cookie放到这里"
spider = Spider(cookie=cookie)
for fid in [158]:
   for tid in spider.fid_to_tid(fid):
         spider.get_content(fid, tid)
print(spider.bd_url_info)

ou315001655 发表于 2020-5-5 23:46:27

没回复的可以取到吗？

请我喝茶 发表于 2020-5-6 11:08:05

ou315001655 发表于 2020-5-5 23:46
没回复的可以取到吗？

不能，不回复就想拿内容可以学渗透然后自己搞

请我喝茶 发表于 2020-5-6 11:09:26

ou315001655 发表于 2020-5-5 23:46
没回复的可以取到吗？

这个你睡前或者看视频的时候挂起来让它跑起来就OK，不用管他

qwertyuiop1822 发表于 2020-8-9 00:25:00

就是300秒内不能重复发言比较恶心，所以想都回复一下，以后搜就可以直接看了。保存那么多到百度云也没用

fafa100 发表于 2020-8-27 08:10:08

:):):):):):):):):):)

zlmzygx8 发表于 2020-9-22 20:40:48

666

ilike 发表于 2020-11-24 18:20:36

谢谢分享

415194510 发表于 2022-2-1 00:03:48

666

pgone 发表于 2022-7-11 02:10:24

感谢楼主的无私分享！

页: [1] 2

资源共享吧|易语言论坛|逆向破解教程|辅助开发教程|网络安全教程|www.zygx8.com|我的开发技术随记's Archiver

python实现自动采集VIP课程帖子中的百度网盘链接