fixed zaxtyson/LanZouCloud-API#59 zaxtyson/LanZouCloud-API#60

zaxtyson · zaxtyson · commit 4b7895b9248b · 2021-05-15T10:43:20.000+08:00
diff --git a/README.md b/README.md
@@ -101,8 +101,26 @@
 
 ![](https://upload.cc/i1/2021/03/03/UIDRAQ.png)
 
+# Terminus
+
+除了 Windows Terminal, 还可以试试 [Terminus](https://github.com/eugeny/terminus)
+
+通过 `Settings` > `Shell` > `New Profiles` > `Custom Shell` 添加一个配置
+
+点击新添加的配置, 设置 `Command` 为 `D:\Program Files\蓝奏云 CMD\lanzou-cmd.exe`(这里改为实际安装路径),
+`Working directory` 可以设置为安装目录或其它目录 `D:\Program Files\蓝奏云 CMD`
+
+![](https://i.bmp.ovh/imgs/2021/05/9338f7feb6b968cc.png)
+
 # 更新日志
 
+## `v2.6.5`
+
+-
+修复蓝奏云主域名解析异常的问题[#59](https://github.com/zaxtyson/LanZouCloud-API/issues/59) [#60](https://github.com/zaxtyson/LanZouCloud-API/pull/60)
+- 修复某些文件夹信息获取失败的问题[#58](https://github.com/zaxtyson/LanZouCloud-API/pull/58)
+- 修复下载页的 Cookie 验证问题[#55](https://github.com/zaxtyson/LanZouCloud-API/pull/55)
+
 ## `v2.6.3`
 
 - 修复文件后缀非小写导致的误判问题[#92](https://github.com/rachpt/lanzou-gui/issues/92)
diff --git a/lanzou/api/__init__.py b/lanzou/api/__init__.py
@@ -1,5 +1,5 @@
 from lanzou.api.core import LanZouCloud
 
-version = '2.6.2'
+version = '2.6.5'
 
 __all__ = ['utils', 'types', 'models', 'LanZouCloud', 'version']
diff --git a/lanzou/api/core.py b/lanzou/api/core.py
@@ -45,7 +45,7 @@ def __init__(self):
         self._timeout = 15  # 每个请求的超时(不包含下载响应体的用时)
         self._max_size = 100  # 单个文件大小上限 MB
         self._upload_delay = (0, 0)  # 文件上传延时
-        self._host_url = 'https://pan.lanzous.com'
+        self._host_url = 'https://pan.lanzoui.com'
         self._doupload_url = 'https://pc.woozooo.com/doupload.php'
         self._account_url = 'https://pc.woozooo.com/account.php'
         self._mydisk_url = 'https://pc.woozooo.com/mydisk.php'
@@ -58,20 +58,36 @@ def __init__(self):
         disable_warnings(InsecureRequestWarning)  # 全局禁用 SSL 警告
 
     def _get(self, url, **kwargs):
-        try:
-            kwargs.setdefault('timeout', self._timeout)
-            kwargs.setdefault('headers', self._headers)
-            return self._session.get(url, verify=False, **kwargs)
-        except (ConnectionError, requests.RequestException):
-            return None
+        for possible_url in self._all_possible_urls(url):
+            try:
+                kwargs.setdefault('timeout', self._timeout)
+                kwargs.setdefault('headers', self._headers)
+                return self._session.get(possible_url, verify=False, **kwargs)
+            except (ConnectionError, requests.RequestException):
+                logger.debug(f"Get {possible_url} failed, try another domain")
+
+        return None
 
     def _post(self, url, data, **kwargs):
-        try:
-            kwargs.setdefault('timeout', self._timeout)
-            kwargs.setdefault('headers', self._headers)
-            return self._session.post(url, data, verify=False, **kwargs)
-        except (ConnectionError, requests.RequestException):
-            return None
+        for possible_url in self._all_possible_urls(url):
+            try:
+                kwargs.setdefault('timeout', self._timeout)
+                kwargs.setdefault('headers', self._headers)
+                return self._session.post(possible_url, data, verify=False, **kwargs)
+            except (ConnectionError, requests.RequestException):
+                logger.debug(f"Post to {possible_url} ({data}) failed, try another domain")
+
+        return None
+
+    @staticmethod
+    def _all_possible_urls(url: str) -> List[str]:
+        """蓝奏云的主域名有时会挂掉, 此时尝试切换到备用域名"""
+        available_domains = [
+            'lanzoui.com',  # 鲁ICP备15001327号-6, 2020-06-09, SEO 排名最低
+            'lanzoux.com',  # 鲁ICP备15001327号-5, 2020-06-09
+            'lanzous.com'  # 主域名, 备案异常, 部分地区已经无法访问
+        ]
+        return [url.replace('lanzous.com', d) for d in available_domains]
 
     def ignore_limits(self):
         """解除官方限制"""
@@ -135,8 +151,6 @@ def login_by_cookie(self, cookie: dict) -> int:
 
     def logout(self) -> int:
         """注销"""
-        self._cookies = None
-        self._session.cookies.clear()
         html = self._get(self._account_url, params={'action': 'logout'})
         if not html:
             return LanZouCloud.NETWORK_ERROR
@@ -435,6 +449,16 @@ def get_file_info_by_url(self, share_url, pwd='') -> FileDetail:
         if not first_page:
             return FileDetail(LanZouCloud.NETWORK_ERROR, pwd=pwd, url=share_url)
 
+        if "acw_sc__v2" in first_page.text:
+            # 在页面被过多访问或其他情况下，有时候会先返回一个加密的页面，其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面
+            # 若该页面进行了js加密，则进行解密，计算acw_sc__v2，并加入cookie
+            acw_sc__v2 = calc_acw_sc__v2(first_page.text)
+            self._session.cookies.set("acw_sc__v2", acw_sc__v2)
+            logger.debug(f"Set Cookie: acw_sc__v2={acw_sc__v2}")
+            first_page = self._get(share_url)  # 文件分享页面(第一页)
+            if not first_page:
+                return FileDetail(LanZouCloud.NETWORK_ERROR, pwd=pwd, url=share_url)
+
         first_page = remove_notes(first_page.text)  # 去除网页里的注释
         if '文件取消' in first_page or '文件不存在' in first_page:
             return FileDetail(LanZouCloud.FILE_CANCELLED, pwd=pwd, url=share_url)
@@ -950,25 +974,8 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
         if not resp:
             return LanZouCloud.FAILED
 
-        content_length = resp.headers.get('Content-Length', None)
-        # 如果无法获取 Content-Length, 先读取一点数据, 再尝试获取一次
-        # 通常只需读取 1 字节数据
-        data_iter = resp.iter_content(chunk_size=1)
-        while not content_length:
-            logger.warning("Not found Content-Length in response headers")
-            logger.debug("Read 1 byte from stream...")
-            try:
-                next(data_iter)
-            except StopIteration:
-                logger.debug("Please wait for a moment before downloading")
-                return LanZouCloud.FAILED
-            resp_ = self._get(info.durl, stream=True)
-            if not resp_:
-                return LanZouCloud.FAILED
-            content_length = resp_.headers.get('Content-Length', None)
-            logger.debug(f"Content-Length: {content_length}")
-
-        total_size = int(content_length)
+        # 如果本地存在同名文件且设置了 overwrite, 则覆盖原文件
+        # 否则修改下载文件路径, 自动在文件名后加序号
         file_path = save_path + os.sep + info.name
         if os.path.exists(file_path):
             if overwrite:
@@ -981,9 +988,33 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
         tmp_file_path = file_path + '.download'  # 正在下载中的文件名
         logger.debug(f'Save file to {tmp_file_path}')
 
+        # 对于 txt 文件, 可能出现没有 Content-Length 的情况
+        # 此时文件需要下载一次才会出现 Content-Length
+        # 这时候我们先读取一点数据, 再尝试获取一次, 通常只需读取 1 字节数据
+        content_length = resp.headers.get('Content-Length', None)
+        if not content_length:
+            data_iter = resp.iter_content(chunk_size=1)
+            max_retries = 5  # 5 次拿不到就算了
+            while not content_length and max_retries > 0:
+                max_retries -= 1
+                logger.warning("Not found Content-Length in response headers")
+                logger.debug("Read 1 byte from stream...")
+                try:
+                    next(data_iter)  # 读取一个字节
+                except StopIteration:
+                    logger.debug("Please wait for a moment before downloading")
+                    return LanZouCloud.FAILED
+                resp_ = self._get(info.durl, stream=True)  # 再请求一次试试
+                if not resp_:
+                    return LanZouCloud.FAILED
+                content_length = resp_.headers.get('Content-Length', None)
+                logger.debug(f"Content-Length: {content_length}")
+
+        if not content_length:
+            return LanZouCloud.FAILED  # 应该不会出现这种情况
+
+        # 支持断点续传下载
         now_size = 0
-        chunk_size = 4096
-        last_512_bytes = b''  # 用于识别文件是否携带真实文件名信息
         if os.path.exists(tmp_file_path):
             now_size = os.path.getsize(tmp_file_path)  # 本地已经下载的文件大小
         headers = {**self._headers, 'Range': 'bytes=%d-' % now_size}
@@ -996,30 +1027,43 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
 
         with open(tmp_file_path, "ab") as f:
             file_name = os.path.basename(file_path)
-            for chunk in resp.iter_content(chunk_size):
+            for chunk in resp.iter_content(4096):
                 if chunk:
                     f.write(chunk)
                     f.flush()
                     now_size += len(chunk)
-                    if total_size - now_size < 512:
-                        last_512_bytes += chunk
                     if callback is not None:
-                        callback(file_name, total_size, now_size)
+                        callback(file_name, int(content_length), now_size)
+
+        # 文件下载完成后, 检查文件尾部 512 字节数据
+        # 绕过官方限制上传时, API 会隐藏文件真实信息到文件尾部
+        # 这里尝试提取隐藏信息, 并截断文件尾部数据
         os.rename(tmp_file_path, file_path)  # 下载完成，改回正常文件名
-        # 尝试解析文件报尾
-        file_info = un_serialize(last_512_bytes[-512:])
-        if file_info is not None and 'padding' in file_info:  # 大文件的记录文件也可以反序列化出 name,但是没有 padding
-            real_name = file_info['name']  # 解除伪装的真实文件名
-            logger.debug(f"Find meta info: real_name={real_name}")
-            real_path = save_path + os.sep + real_name
-            if overwrite and os.path.exists(real_path):
-                os.remove(real_path)  # 删除原文件
-            new_file_path = auto_rename(real_path)
-            os.rename(file_path, new_file_path)
-            with open(new_file_path, 'rb+') as f:
-                f.seek(-512, 2)  # 截断最后 512 字节数据
-                f.truncate()
-            file_path = new_file_path  # 保存文件重命名后真实路径
+        if os.path.getsize(file_path) > 512:  # 文件大于 512 bytes 就检查一下
+            file_info = None
+            with open(file_path, 'rb') as f:
+                f.seek(-512, os.SEEK_END)
+                last_512_bytes = f.read()
+                file_info = un_serialize(last_512_bytes)
+
+            # 大文件的记录文件也可以反序列化出 name,但是没有 padding 字段
+            if file_info is not None and 'padding' in file_info:
+                real_name = file_info['name']  # 解除伪装的真实文件名
+                logger.debug(f"Find meta info: real_name={real_name}")
+                real_path = save_path + os.sep + real_name
+                # 如果存在同名文件且设置了 overwrite, 删掉原文件
+                if overwrite and os.path.exists(real_path):
+                    os.remove(real_path)
+                # 自动重命名, 文件存在就会加个序号
+                new_file_path = auto_rename(real_path)
+                os.rename(file_path, new_file_path)
+                # 截断最后 512 字节隐藏信息, 还原文件
+                with open(new_file_path, 'rb+') as f:
+                    f.seek(-512, os.SEEK_END)
+                    f.truncate()
+                file_path = new_file_path  # 保存文件重命名后真实路径
+
+        # 如果设置了下载完成的回调函数, 调用之
         if downloaded_handler is not None:
             downloaded_handler(os.path.abspath(file_path))
         return LanZouCloud.SUCCESS
@@ -1046,6 +1090,15 @@ def get_folder_info_by_url(self, share_url, dir_pwd='') -> FolderDetail:
         # 要求输入密码, 用户描述中可能带有"输入密码",所以不用这个字符串判断
         if ('id="pwdload"' in html or 'id="passwddiv"' in html) and len(dir_pwd) == 0:
             return FolderDetail(LanZouCloud.LACK_PASSWORD)
+
+        if "acw_sc__v2" in html:
+            # 在页面被过多访问或其他情况下，有时候会先返回一个加密的页面，其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面
+            # 若该页面进行了js加密，则进行解密，计算acw_sc__v2，并加入cookie
+            acw_sc__v2 = calc_acw_sc__v2(html)
+            self._session.cookies.set("acw_sc__v2", acw_sc__v2)
+            logger.debug(f"Set Cookie: acw_sc__v2={acw_sc__v2}")
+            html = self._get(share_url).text  # 文件分享页面(第一页)
+
         try:
             # 获取文件需要的参数
             html = remove_notes(html)
@@ -1136,6 +1189,7 @@ def _check_big_file(self, file_list):
                 logger.debug(f"Big file checking: Failed")
                 return None
             resp = self._get(info.durl)
+            # 这里无需知道 txt 文件的 Content-Length, 全部读取即可
             info = un_serialize(resp.content) if resp else None
             if info is not None:  # 确认是大文件
                 name, size, *_, parts = info.values()  # 真实文件名, 文件字节大小, (其它数据),分段数据文件名(有序)
diff --git a/lanzou/api/utils.py b/lanzou/api/utils.py
@@ -12,7 +12,7 @@
 import requests
 
 __all__ = ['logger', 'remove_notes', 'name_format', 'time_format', 'is_name_valid', 'is_file_url',
-           'is_folder_url', 'big_file_split', 'un_serialize', 'let_me_upload', 'auto_rename']
+           'is_folder_url', 'big_file_split', 'un_serialize', 'let_me_upload', 'auto_rename', 'calc_acw_sc__v2']
 
 # 调试日志设置
 logger = logging.getLogger('lanzou')
@@ -26,7 +26,7 @@
 
 headers = {
     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
-    'Referer': 'https://pan.lanzous.com',
+    # 'Referer': 'https://pan.lanzous.com',  # 可以没有
     'Accept-Language': 'zh-CN,zh;q=0.9',
 }
 
@@ -207,3 +207,38 @@ def auto_rename(file_path) -> str:
     while f"{fname_no_ext}({count}){ext}" in flist:
         count += 1
     return fpath + os.sep + fname_no_ext + '(' + str(count) + ')' + ext
+
+
+def calc_acw_sc__v2(html_text: str) -> str:
+    arg1 = re.search(r"arg1='([0-9A-Z]+)'", html_text)
+    arg1 = arg1.group(1) if arg1 else ""
+    acw_sc__v2 = hex_xor(unsbox(arg1), "3000176000856006061501533003690027800375")
+    return acw_sc__v2
+
+
+# 参考自 https://zhuanlan.zhihu.com/p/228507547
+def unsbox(str_arg):
+    v1 = [15, 35, 29, 24, 33, 16, 1, 38, 10, 9, 19, 31, 40, 27, 22, 23, 25, 13, 6, 11, 39, 18, 20, 8, 14, 21, 32, 26, 2,
+          30, 7, 4, 17, 5, 3, 28, 34, 37, 12, 36]
+    v2 = ["" for _ in v1]
+    for idx in range(0, len(str_arg)):
+        v3 = str_arg[idx]
+        for idx2 in range(len(v1)):
+            if v1[idx2] == idx + 1:
+                v2[idx2] = v3
+
+    res = ''.join(v2)
+    return res
+
+
+def hex_xor(str_arg, args):
+    res = ''
+    for idx in range(0, min(len(str_arg), len(args)), 2):
+        v1 = int(str_arg[idx:idx + 2], 16)
+        v2 = int(args[idx:idx + 2], 16)
+        v3 = format(v1 ^ v2, 'x')
+        if len(v3) == 1:
+            v3 = '0' + v3
+        res += v3
+
+    return res
diff --git a/lanzou/cmder/__init__.py b/lanzou/cmder/__init__.py
@@ -1,5 +1,5 @@
 from lanzou.cmder.config import config
 
-version = '2.6.3'
+version = '2.6.5'
 
 __all__ = ['cmder', 'utils', 'version', 'config']