Skip to content
This repository was archived by the owner on Apr 17, 2023. It is now read-only.

Commit 7f1c291

Browse files
committed
fix TwitterCrawler to extract only videos
1 parent 0bb8481 commit 7f1c291

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

twitter_video_tools/twitter_crawler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def get_recent_liked_tweet(self, username: str) -> str:
7272
self._goto_liked_tweets(username)
7373
return self._get_article_links_in_current_screen()[0]
7474

75-
def get_video_of_tweet(self, link: str, timeout: Optional[float] = 5000) -> list[tuple[str, str]]:
75+
def get_video_of_tweet(self, link: str, timeout: Optional[float] = 10000) -> list[tuple[str, str]]:
7676
video_links: list[str] = []
7777

7878
def _request_m3u8_capture_handler(request: Request) -> None:
@@ -101,7 +101,7 @@ def _get_article_links_in_current_screen(self) -> list[str]:
101101
links: list[str] = []
102102

103103
while True:
104-
articles = self.page.locator('article')
104+
articles = self.page.locator('article:has(video)')
105105
article_length = articles.count()
106106
try:
107107
links = [
@@ -112,5 +112,6 @@ def _get_article_links_in_current_screen(self) -> list[str]:
112112
break
113113
except Error: # if articles in the page are not reachable
114114
self.page.mouse.wheel(0, 500) # scrolling down to refresh the articles
115+
self.page.mouse.wheel(0, -500) # scrolling down to refresh the articles
115116

116117
return links

0 commit comments

Comments
 (0)