@@ -45,7 +45,7 @@ def __init__(self):
45
45
self ._timeout = 15 # 每个请求的超时(不包含下载响应体的用时)
46
46
self ._max_size = 100 # 单个文件大小上限 MB
47
47
self ._upload_delay = (0 , 0 ) # 文件上传延时
48
- self ._host_url = 'https://pan.lanzous .com'
48
+ self ._host_url = 'https://pan.lanzoui .com'
49
49
self ._doupload_url = 'https://pc.woozooo.com/doupload.php'
50
50
self ._account_url = 'https://pc.woozooo.com/account.php'
51
51
self ._mydisk_url = 'https://pc.woozooo.com/mydisk.php'
@@ -58,20 +58,36 @@ def __init__(self):
58
58
disable_warnings (InsecureRequestWarning ) # 全局禁用 SSL 警告
59
59
60
60
def _get (self , url , ** kwargs ):
61
- try :
62
- kwargs .setdefault ('timeout' , self ._timeout )
63
- kwargs .setdefault ('headers' , self ._headers )
64
- return self ._session .get (url , verify = False , ** kwargs )
65
- except (ConnectionError , requests .RequestException ):
66
- return None
61
+ for possible_url in self ._all_possible_urls (url ):
62
+ try :
63
+ kwargs .setdefault ('timeout' , self ._timeout )
64
+ kwargs .setdefault ('headers' , self ._headers )
65
+ return self ._session .get (possible_url , verify = False , ** kwargs )
66
+ except (ConnectionError , requests .RequestException ):
67
+ logger .debug (f"Get { possible_url } failed, try another domain" )
68
+
69
+ return None
67
70
68
71
def _post (self , url , data , ** kwargs ):
69
- try :
70
- kwargs .setdefault ('timeout' , self ._timeout )
71
- kwargs .setdefault ('headers' , self ._headers )
72
- return self ._session .post (url , data , verify = False , ** kwargs )
73
- except (ConnectionError , requests .RequestException ):
74
- return None
72
+ for possible_url in self ._all_possible_urls (url ):
73
+ try :
74
+ kwargs .setdefault ('timeout' , self ._timeout )
75
+ kwargs .setdefault ('headers' , self ._headers )
76
+ return self ._session .post (possible_url , data , verify = False , ** kwargs )
77
+ except (ConnectionError , requests .RequestException ):
78
+ logger .debug (f"Post to { possible_url } ({ data } ) failed, try another domain" )
79
+
80
+ return None
81
+
82
+ @staticmethod
83
+ def _all_possible_urls (url : str ) -> List [str ]:
84
+ """蓝奏云的主域名有时会挂掉, 此时尝试切换到备用域名"""
85
+ available_domains = [
86
+ 'lanzoui.com' , # 鲁ICP备15001327号-6, 2020-06-09, SEO 排名最低
87
+ 'lanzoux.com' , # 鲁ICP备15001327号-5, 2020-06-09
88
+ 'lanzous.com' # 主域名, 备案异常, 部分地区已经无法访问
89
+ ]
90
+ return [url .replace ('lanzous.com' , d ) for d in available_domains ]
75
91
76
92
def ignore_limits (self ):
77
93
"""解除官方限制"""
@@ -135,8 +151,6 @@ def login_by_cookie(self, cookie: dict) -> int:
135
151
136
152
def logout (self ) -> int :
137
153
"""注销"""
138
- self ._cookies = None
139
- self ._session .cookies .clear ()
140
154
html = self ._get (self ._account_url , params = {'action' : 'logout' })
141
155
if not html :
142
156
return LanZouCloud .NETWORK_ERROR
@@ -435,6 +449,16 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
435
449
if not first_page :
436
450
return FileDetail (LanZouCloud .NETWORK_ERROR , pwd = pwd , url = share_url )
437
451
452
+ if "acw_sc__v2" in first_page .text :
453
+ # 在页面被过多访问或其他情况下,有时候会先返回一个加密的页面,其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面
454
+ # 若该页面进行了js加密,则进行解密,计算acw_sc__v2,并加入cookie
455
+ acw_sc__v2 = calc_acw_sc__v2 (first_page .text )
456
+ self ._session .cookies .set ("acw_sc__v2" , acw_sc__v2 )
457
+ logger .debug (f"Set Cookie: acw_sc__v2={ acw_sc__v2 } " )
458
+ first_page = self ._get (share_url ) # 文件分享页面(第一页)
459
+ if not first_page :
460
+ return FileDetail (LanZouCloud .NETWORK_ERROR , pwd = pwd , url = share_url )
461
+
438
462
first_page = remove_notes (first_page .text ) # 去除网页里的注释
439
463
if '文件取消' in first_page or '文件不存在' in first_page :
440
464
return FileDetail (LanZouCloud .FILE_CANCELLED , pwd = pwd , url = share_url )
@@ -950,25 +974,8 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
950
974
if not resp :
951
975
return LanZouCloud .FAILED
952
976
953
- content_length = resp .headers .get ('Content-Length' , None )
954
- # 如果无法获取 Content-Length, 先读取一点数据, 再尝试获取一次
955
- # 通常只需读取 1 字节数据
956
- data_iter = resp .iter_content (chunk_size = 1 )
957
- while not content_length :
958
- logger .warning ("Not found Content-Length in response headers" )
959
- logger .debug ("Read 1 byte from stream..." )
960
- try :
961
- next (data_iter )
962
- except StopIteration :
963
- logger .debug ("Please wait for a moment before downloading" )
964
- return LanZouCloud .FAILED
965
- resp_ = self ._get (info .durl , stream = True )
966
- if not resp_ :
967
- return LanZouCloud .FAILED
968
- content_length = resp_ .headers .get ('Content-Length' , None )
969
- logger .debug (f"Content-Length: { content_length } " )
970
-
971
- total_size = int (content_length )
977
+ # 如果本地存在同名文件且设置了 overwrite, 则覆盖原文件
978
+ # 否则修改下载文件路径, 自动在文件名后加序号
972
979
file_path = save_path + os .sep + info .name
973
980
if os .path .exists (file_path ):
974
981
if overwrite :
@@ -981,9 +988,33 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
981
988
tmp_file_path = file_path + '.download' # 正在下载中的文件名
982
989
logger .debug (f'Save file to { tmp_file_path } ' )
983
990
991
+ # 对于 txt 文件, 可能出现没有 Content-Length 的情况
992
+ # 此时文件需要下载一次才会出现 Content-Length
993
+ # 这时候我们先读取一点数据, 再尝试获取一次, 通常只需读取 1 字节数据
994
+ content_length = resp .headers .get ('Content-Length' , None )
995
+ if not content_length :
996
+ data_iter = resp .iter_content (chunk_size = 1 )
997
+ max_retries = 5 # 5 次拿不到就算了
998
+ while not content_length and max_retries > 0 :
999
+ max_retries -= 1
1000
+ logger .warning ("Not found Content-Length in response headers" )
1001
+ logger .debug ("Read 1 byte from stream..." )
1002
+ try :
1003
+ next (data_iter ) # 读取一个字节
1004
+ except StopIteration :
1005
+ logger .debug ("Please wait for a moment before downloading" )
1006
+ return LanZouCloud .FAILED
1007
+ resp_ = self ._get (info .durl , stream = True ) # 再请求一次试试
1008
+ if not resp_ :
1009
+ return LanZouCloud .FAILED
1010
+ content_length = resp_ .headers .get ('Content-Length' , None )
1011
+ logger .debug (f"Content-Length: { content_length } " )
1012
+
1013
+ if not content_length :
1014
+ return LanZouCloud .FAILED # 应该不会出现这种情况
1015
+
1016
+ # 支持断点续传下载
984
1017
now_size = 0
985
- chunk_size = 4096
986
- last_512_bytes = b'' # 用于识别文件是否携带真实文件名信息
987
1018
if os .path .exists (tmp_file_path ):
988
1019
now_size = os .path .getsize (tmp_file_path ) # 本地已经下载的文件大小
989
1020
headers = {** self ._headers , 'Range' : 'bytes=%d-' % now_size }
@@ -996,30 +1027,43 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
996
1027
997
1028
with open (tmp_file_path , "ab" ) as f :
998
1029
file_name = os .path .basename (file_path )
999
- for chunk in resp .iter_content (chunk_size ):
1030
+ for chunk in resp .iter_content (4096 ):
1000
1031
if chunk :
1001
1032
f .write (chunk )
1002
1033
f .flush ()
1003
1034
now_size += len (chunk )
1004
- if total_size - now_size < 512 :
1005
- last_512_bytes += chunk
1006
1035
if callback is not None :
1007
- callback (file_name , total_size , now_size )
1036
+ callback (file_name , int (content_length ), now_size )
1037
+
1038
+ # 文件下载完成后, 检查文件尾部 512 字节数据
1039
+ # 绕过官方限制上传时, API 会隐藏文件真实信息到文件尾部
1040
+ # 这里尝试提取隐藏信息, 并截断文件尾部数据
1008
1041
os .rename (tmp_file_path , file_path ) # 下载完成,改回正常文件名
1009
- # 尝试解析文件报尾
1010
- file_info = un_serialize (last_512_bytes [- 512 :])
1011
- if file_info is not None and 'padding' in file_info : # 大文件的记录文件也可以反序列化出 name,但是没有 padding
1012
- real_name = file_info ['name' ] # 解除伪装的真实文件名
1013
- logger .debug (f"Find meta info: real_name={ real_name } " )
1014
- real_path = save_path + os .sep + real_name
1015
- if overwrite and os .path .exists (real_path ):
1016
- os .remove (real_path ) # 删除原文件
1017
- new_file_path = auto_rename (real_path )
1018
- os .rename (file_path , new_file_path )
1019
- with open (new_file_path , 'rb+' ) as f :
1020
- f .seek (- 512 , 2 ) # 截断最后 512 字节数据
1021
- f .truncate ()
1022
- file_path = new_file_path # 保存文件重命名后真实路径
1042
+ if os .path .getsize (file_path ) > 512 : # 文件大于 512 bytes 就检查一下
1043
+ file_info = None
1044
+ with open (file_path , 'rb' ) as f :
1045
+ f .seek (- 512 , os .SEEK_END )
1046
+ last_512_bytes = f .read ()
1047
+ file_info = un_serialize (last_512_bytes )
1048
+
1049
+ # 大文件的记录文件也可以反序列化出 name,但是没有 padding 字段
1050
+ if file_info is not None and 'padding' in file_info :
1051
+ real_name = file_info ['name' ] # 解除伪装的真实文件名
1052
+ logger .debug (f"Find meta info: real_name={ real_name } " )
1053
+ real_path = save_path + os .sep + real_name
1054
+ # 如果存在同名文件且设置了 overwrite, 删掉原文件
1055
+ if overwrite and os .path .exists (real_path ):
1056
+ os .remove (real_path )
1057
+ # 自动重命名, 文件存在就会加个序号
1058
+ new_file_path = auto_rename (real_path )
1059
+ os .rename (file_path , new_file_path )
1060
+ # 截断最后 512 字节隐藏信息, 还原文件
1061
+ with open (new_file_path , 'rb+' ) as f :
1062
+ f .seek (- 512 , os .SEEK_END )
1063
+ f .truncate ()
1064
+ file_path = new_file_path # 保存文件重命名后真实路径
1065
+
1066
+ # 如果设置了下载完成的回调函数, 调用之
1023
1067
if downloaded_handler is not None :
1024
1068
downloaded_handler (os .path .abspath (file_path ))
1025
1069
return LanZouCloud .SUCCESS
@@ -1046,6 +1090,15 @@ def get_folder_info_by_url(self, share_url, dir_pwd='') -> FolderDetail:
1046
1090
# 要求输入密码, 用户描述中可能带有"输入密码",所以不用这个字符串判断
1047
1091
if ('id="pwdload"' in html or 'id="passwddiv"' in html ) and len (dir_pwd ) == 0 :
1048
1092
return FolderDetail (LanZouCloud .LACK_PASSWORD )
1093
+
1094
+ if "acw_sc__v2" in html :
1095
+ # 在页面被过多访问或其他情况下,有时候会先返回一个加密的页面,其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面
1096
+ # 若该页面进行了js加密,则进行解密,计算acw_sc__v2,并加入cookie
1097
+ acw_sc__v2 = calc_acw_sc__v2 (html )
1098
+ self ._session .cookies .set ("acw_sc__v2" , acw_sc__v2 )
1099
+ logger .debug (f"Set Cookie: acw_sc__v2={ acw_sc__v2 } " )
1100
+ html = self ._get (share_url ).text # 文件分享页面(第一页)
1101
+
1049
1102
try :
1050
1103
# 获取文件需要的参数
1051
1104
html = remove_notes (html )
@@ -1136,6 +1189,7 @@ def _check_big_file(self, file_list):
1136
1189
logger .debug (f"Big file checking: Failed" )
1137
1190
return None
1138
1191
resp = self ._get (info .durl )
1192
+ # 这里无需知道 txt 文件的 Content-Length, 全部读取即可
1139
1193
info = un_serialize (resp .content ) if resp else None
1140
1194
if info is not None : # 确认是大文件
1141
1195
name , size , * _ , parts = info .values () # 真实文件名, 文件字节大小, (其它数据),分段数据文件名(有序)
0 commit comments