-
Notifications
You must be signed in to change notification settings - Fork 516
Description
import feapder
import requests
import re
from tqdm import tqdm
import os
class AirSpiderDemo(feapder.AirSpider):
videoList = []
requested_urls = set() # 用于跟踪已经请求过的URL
headers = {
'sec-ch-ua': '"Chromium";v="121", "Not A(Brand";v="99"',
'Referer': 'https://www.zhifei100.com/',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 SamanthaDoubao/1.33.9',
'sec-ch-ua-platform': '"Windows"'
}
def start_requests(self):
for video in self.videoList:
# 视频附件
attachmentUrl = video.get('attachmentUrl')
# 视频名称
videoName = video.get('name')
# videoUrl
videoUrl = video.get('videoUrl')
videoInfo = {
"attachmentUrl": attachmentUrl,
"videoName": videoName,
"videoUrl": videoUrl
}
print(attachmentUrl not in self.requested_urls)
if attachmentUrl and type(attachmentUrl) == str and attachmentUrl not in self.requested_urls:
self.requested_urls.add(attachmentUrl) # 将URL添加到集合中
yield feapder.Request(attachmentUrl, callback=self.parse_save_pdf, videoInfo=videoInfo)
else:
print(f"附件url请求错误或已请求: {attachmentUrl}")
if videoUrl and type(videoUrl) == str and videoUrl not in self.requested_urls:
print(videoUrl not in self.requested_urls)
self.requested_urls.add(videoUrl) # 将URL添加到集合中
yield feapder.Request(videoUrl, callback=self.parse_save_video, videoInfo=videoInfo)
else:
print(f"视频请求错误或已请求: {videoUrl}")
def parse_save_video(self, request,response):
print("parse_save_video")
prefer_url=request.videoInfo.get('videoUrl').split('index.m3u8')[0]
# 进行匹配ts文件
ts_list = re.findall("#EXTINF:.,\n(.)", response.text)
# 进行拼接ts文件请求链接
ts_url_list=[prefer_url+i for i in ts_list]
# 课程名字
course_name=request.videoInfo.get('videoName')
# 进行判断是否存在这个目录
if not os.path.exists(f"courses/{course_name}"):
os.makedirs(f"courses/{course_name}")
# 进行保存视频
with tqdm(total=len(ts_url_list), desc=f"{course_name},下载进度", unit="MB") as pbar:
# 进行获取视频
for ts in ts_url_list:
response = requests.get(ts, headers=self.headers)
with open(f"courses/{course_name}/{course_name}.mp4", "ab") as f:
f.write(response.content)
pbar.update(1)
# 保存视频
def parse_save_pdf(self, request, response):
print("parse_save_pdf")
"""
保存PDF附件,将其保存到对应课程文件夹下,方便统一管理,并添加进度条展示保存进度。
"""
# 课程名字
course_name = request.videoInfo.get('videoName')
# 进行判断是否存在这个目录
if not os.path.exists(f"courses/{course_name}"):
os.makedirs(f"courses/{course_name}")
# 获取文件总大小(用于进度条显示总进度)
file_size = len(response.content)
# 设置每次写入的块大小(可根据实际情况调整)
block_size = 1024
# 使用tqdm创建进度条对象,设置总进度为文件总大小,单位为字节('B'),描述信息为保存PDF的相关提示
with tqdm(total=file_size, unit='B', unit_scale=True, desc=f'Saving PDF for {course_name}') as pbar:
# 进行保存附件
with open(f"courses/{course_name}/{course_name}.pdf", "wb") as f:
# 已写入的字节数
written_size = 0
while written_size < file_size:
# 每次写入的实际字节数,取剩余字节数和块大小的较小值
write_now = min(block_size, file_size - written_size)
f.write(response.content[written_size:written_size + write_now])
written_size += write_now
# 更新进度条
pbar.update(write_now)
if name == "main":
url = "https:"
headers = {
............
}
data = {
"id": "725"
}
cookies = {
'''''''''''
}
result = requests.post(url, data=data, headers=headers, cookies=cookies).json()
result = result.get("result")
# 获取到视频列表
videoList = result.get('videoList')
AirSpiderDemo.videoList = videoList # 将获取到的视频列表赋值给类属性
AirSpiderDemo(thread_count=8).start()