firecrawl · drobnikj · Nov 25, 2025 · Nov 25, 2025 · Dec 1, 2025
diff --git a/README.md b/README.md
@@ -8,21 +8,21 @@ This framework supports APIs for Firecrawl, Apify, ScraperAPI, ScrapingBee, Zyte
 
 Below are evaluation results across different engines.
 
-| Engine	        | Coverage (Success Rate) (%)	| Quality (F1) |
-|-----------------|-----------------------------|--------------|
-| Firecrawl	      | 80.9	                      | 0.68         |
-| Exa	            | 76.3	                      | 0.53         |
-| Tavily	        | 67.6	                      | 0.50         |
-| ScraperAPI	    | 63.5	                      | 0.45         |
-| Zyte	          | 62.9	                      | 0.47         |
-| ScrapingBee	    | 60.6	                      | 0.45         |
-| Apify	          | 60.2	                      | 0.42         |
-| Crawl4ai	      | 58.0	                      | 0.45         |
-| Selenium	      | 55.0	                      | 0.40         |
-| Scrapy	        | 54.0	                      | 0.43         |
-| Puppeteer	      | 53.7	                      | 0.41         |
-| Rest (requests)	| 50.6	                      | 0.36         |
-| Playwright	    | 39.5	                      | 0.34         |
+| Engine	        | Coverage (Success Rate) (%)	 | Quality (F1) |
+|-----------------|------------------------------|--------------|
+| Firecrawl	      | 80.9	                        | 0.68         |
+| Exa	            | 76.3	                        | 0.53         |
+| Apify	          | 75.8	                        | 0.58         |
+| Tavily	        | 67.6	                        | 0.50         |
+| ScraperAPI	    | 63.5	                        | 0.45         |
+| Zyte	          | 62.9	                        | 0.47         |
+| ScrapingBee	    | 60.6	                        | 0.45         |
+| Crawl4ai	      | 58.0	                        | 0.45         |
+| Selenium	      | 55.0	                        | 0.40         |
+| Scrapy	        | 54.0	                        | 0.43         |
+| Puppeteer	      | 53.7	                        | 0.41         |
+| Rest (requests)	| 50.6	                        | 0.36         |
+| Playwright	    | 39.5	                        | 0.34         |
 
 ## Install
 

diff --git a/engines/apify_api.py b/engines/apify_api.py
@@ -24,33 +24,28 @@ def __init__(self):
         if not self.api_token:
             raise RuntimeError("APIFY_API_TOKEN environment variable not set.")
         self.client = ApifyClient(self.api_token)
-        self.actor_id = "apify/web-scraper"
+        self.actor_id = "apify/website-content-crawler"
 
     def scrape(self, url: str, run_id: str) -> ScrapeResult:
         error = None
-        html = ""
+        markdown = ""
         content_size = 0
-        status_code = 500 
+        status_code = 500
         try:
             # Start the actor and wait for it to finish
             actor_client = self.client.actor(self.actor_id)
             run_result = actor_client.call(
                 run_input={
                     "startUrls": [{"url": url}],
-                    "maxRequestsPerCrawl": 1,
-                    "pseudoUrls": [],
-                    "linkSelector": "",
-                    "proxyConfiguration": {"useApifyProxy": True},
-                    "crawlerType": "chrome",
-                    "pageFunction": """
-                      async function(context) {
-                          const $ = context.jQuery;
-                          return {
-                              html: $('body').html(),
-                              status_code: context.response ? context.response.status : null
-                          };
-                      }
-                    """
+                    "crawlerType": "playwright:adaptive",
+                    "maxCrawlPages": 1,
+                    "saveFiles": False,
+                    "saveHtml": False,
+                    "saveHtmlAsFile": False,
+                    "saveMarkdown": True,
+                    "saveScreenshots": False,
+                    "signHttpRequests": False,
+                    "proxyConfiguration": {"useApifyProxy": True}
                 },
                 timeout_secs=120  # Wait up to 2 minutes
             )
@@ -60,12 +55,13 @@ def scrape(self, url: str, run_id: str) -> ScrapeResult:
                 dataset_id = run_result["defaultDatasetId"]
                 dataset_client = self.client.dataset(dataset_id)
                 items = dataset_client.list_items().items
-                if items and "html" in items[0]:
-                    html = items[0]["html"] or ""
-                    status_code = items[0].get("status_code")
-                    content_size = len(html.encode("utf-8")) if html else 0
+                if items and "markdown" in items[0]:
+                    markdown = items[0]["markdown"] or ""
+                    crawl_data = items[0].get("crawl")
+                    status_code = crawl_data.get("httpStatusCode")
+                    content_size = len(markdown.encode("utf-8")) if markdown else 0
                 else:
-                    error = "No HTML found in Apify dataset result."
+                    error = "No markdown found in Apify dataset result."
         except Exception as e:
             error = str(e)
 
@@ -76,7 +72,7 @@ def scrape(self, url: str, run_id: str) -> ScrapeResult:
             status_code=status_code or 500,
             error=error,
             content_size=content_size,
-            format="html",
+            format="markdown",
             created_at=datetime.now().isoformat(),
-            content=html,
+            content=markdown,
         ) 
diff --git a/runs/results/apify_api_quality.json b/runs/results/apify_api_quality.json
@@ -1,6 +1,6 @@
 {
-  "avg_recall": 0.4088987780290431,
-  "avg_precision": 0.4309147557081136,
-  "avg_f1": 0.4166200898332274,
-  "success_rate": 0.6021505376344086
-}
+  "success_rate": 0.758,
+  "avg_recall": 0.490671096073996,
+  "avg_precision": 0.5579099299255283,
+  "avg_f1": 0.5082330459356168
+}