unclecode
diff --git a/‎.claude/settings.local.json
Lines changed: 28 additions & 0 deletions b/‎.claude/settings.local.json
Lines changed: 28 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 6 additions & 0 deletions b/‎.gitignore
Lines changed: 6 additions & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 50 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 50 additions & 0 deletions
diff --git a/‎Dockerfile
Lines changed: 1 addition & 1 deletion b/‎Dockerfile
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,28 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(cd:*)",
+      "Bash(python3:*)",
+      "Bash(python:*)",
+      "Bash(grep:*)",
+      "Bash(mkdir:*)",
+      "Bash(cp:*)",
+      "Bash(rm:*)",
+      "Bash(true)",
+      "Bash(./package-extension.sh:*)",
+      "Bash(find:*)",
+      "Bash(chmod:*)",
+      "Bash(rg:*)",
+      "Bash(/Users/unclecode/.npm-global/lib/node_modules/@anthropic-ai/claude-code/vendor/ripgrep/arm64-darwin/rg -A 5 -B 5 \"Script Builder\" docs/md_v2/apps/crawl4ai-assistant/)",
+      "Bash(/Users/unclecode/.npm-global/lib/node_modules/@anthropic-ai/claude-code/vendor/ripgrep/arm64-darwin/rg -A 30 \"generateCode\\(events, format\\)\" docs/md_v2/apps/crawl4ai-assistant/content/content.js)",
+      "Bash(/Users/unclecode/.npm-global/lib/node_modules/@anthropic-ai/claude-code/vendor/ripgrep/arm64-darwin/rg \"<style>\" docs/md_v2/apps/crawl4ai-assistant/index.html -A 5)",
+      "Bash(git checkout:*)",
+      "Bash(docker logs:*)",
+      "Bash(curl:*)",
+      "Bash(docker compose:*)",
+      "Bash(./test-final-integration.sh:*)",
+      "Bash(mv:*)"
+    ]
+  },
+  "enableAllProjectMcpServers": false
+}
@@ -1,3 +1,6 @@
+# Scripts folder (private tools)
+.scripts/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -265,3 +268,6 @@ tests/**/benchmark_reports
 
 docs/**/data
 .codecat/
+
+docs/apps/linkdin/debug*/
+docs/apps/linkdin/samples/insights/*
@@ -5,6 +5,56 @@ All notable changes to Crawl4AI will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.x] - 2025-06-29
+
+### Added
+- **Virtual Scroll Support**: New `VirtualScrollConfig` for handling virtualized scrolling on modern websites
+  - Automatically detects and handles three scrolling scenarios:
+    - Content unchanged (continue scrolling)
+    - Content appended (traditional infinite scroll)
+    - Content replaced (true virtual scroll - Twitter/Instagram style)
+  - Captures ALL content from pages that replace DOM elements during scroll
+  - Intelligent deduplication based on normalized text content
+  - Configurable scroll amount, count, and wait times
+  - Seamless integration with existing extraction strategies
+  - Comprehensive examples including Twitter timeline, Instagram grid, and mixed content scenarios
+
+## [Unreleased]
+
+### Added
+- **AsyncUrlSeeder**: High-performance URL discovery system for intelligent crawling at scale
+  - Discover URLs from sitemaps and Common Crawl index
+  - Extract and analyze page metadata without full crawling
+  - BM25 relevance scoring for query-based URL filtering
+  - Multi-domain parallel discovery with `many_urls()` method
+  - Automatic caching with TTL for discovered URLs
+  - Rate limiting and concurrent request management
+  - Live URL validation with HEAD requests
+  - JSON-LD and Open Graph metadata extraction
+- **SeedingConfig**: Configuration class for URL seeding operations
+  - Support for multiple discovery sources (`sitemap`, `cc`, `sitemap+cc`)
+  - Pattern-based URL filtering with wildcards
+  - Configurable concurrency and rate limiting
+  - Query-based relevance scoring with BM25
+  - Score threshold filtering for quality control
+- Comprehensive documentation for URL seeding feature
+  - Detailed comparison with deep crawling approaches
+  - Complete API reference with examples
+  - Integration guide with AsyncWebCrawler
+  - Performance benchmarks and best practices
+- Example scripts demonstrating URL seeding:
+  - `url_seeder_demo.py`: Interactive Rich-based demonstration
+  - `url_seeder_quick_demo.py`: Screenshot-friendly examples
+- Test suite for URL seeding with BM25 scoring
+
+### Changed
+- Updated `__init__.py` to export AsyncUrlSeeder and SeedingConfig
+- Enhanced documentation with URL seeding integration examples
+
+### Fixed
+- Corrected examples to properly extract URLs from seeder results before passing to `arun_many()`
+- Fixed logger color compatibility issue (changed `lightblack` to `bright_black`)
+
 ## [0.6.2] - 2025-05-02
 
 ### Added
 
@@ -1,7 +1,7 @@
 FROM python:3.12-slim-bookworm AS build
 
 # C4ai version
-ARG C4AI_VER=0.6.0
+ARG C4AI_VER=0.7.0-r1
 ENV C4AI_VERSION=$C4AI_VER
 LABEL c4ai.version=$C4AI_VER