apify · vdusek · Jan 8, 2026 · Jan 7, 2026 · Jan 7, 2026 · Jan 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,8 @@
 # Cache
 __pycache__
-.mypy_cache
 .pytest_cache
 .ruff_cache
+.ty_cache
 .uv-cache
 
 # Virtual envs

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -46,7 +46,7 @@ make format
 
 ### Type checking
 
-Type checking is handled by [mypy](https://mypy.readthedocs.io/), verifying code against type annotations. Configuration settings can be found in `pyproject.toml`.
+Type checking is handled by [ty](https://docs.astral.sh/ty/), verifying code against type annotations. Configuration settings can be found in `pyproject.toml`.
 
 To run type checking:
 

diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@
 E2E_TESTS_CONCURRENCY = 1
 
 clean:
-	rm -rf .mypy_cache .pytest_cache .ruff_cache .uv-cache build dist htmlcov .coverage
+	rm -rf .uv_cache .pytest_cache .ruff_cache .uv-cache build dist htmlcov .coverage
 
 install-sync:
 	uv sync --all-extras
@@ -27,7 +27,7 @@ lint:
 	uv run ruff check
 
 type-check:
-	uv run mypy
+	uv run ty check
 
 unit-tests:
 	uv run pytest \

diff --git a/docs/deployment/code_examples/google/cloud_run_example.py b/docs/deployment/code_examples/google/cloud_run_example.py
@@ -1,4 +1,3 @@
-# mypy: disable-error-code="misc"
 import json
 import os
 
@@ -9,7 +8,7 @@
 from crawlee.storage_clients import MemoryStorageClient
 
 
-@get('/')  # type: ignore[untyped-decorator]
+@get('/')
 async def main() -> str:
     """The crawler entry point that will be called when the HTTP endpoint is accessed."""
     # highlight-start

diff --git a/docs/deployment/code_examples/google/google_example.py b/docs/deployment/code_examples/google/google_example.py
@@ -1,4 +1,3 @@
-# mypy: disable-error-code="misc"
 import asyncio
 import json
 from datetime import timedelta
@@ -48,7 +47,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
     # highlight-end
 
 
-@functions_framework.http  # type: ignore[untyped-decorator]
+@functions_framework.http
 def crawlee_run(request: Request) -> Response:
     # You can pass data to your crawler using `request`
     function_id = request.headers['Function-Execution-Id']

diff --git a/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py b/docs/guides/code_examples/avoid_blocking/default_fingerprint_generator_with_args.py
@@ -9,7 +9,7 @@
 
 async def main() -> None:
     fingerprint_generator = DefaultFingerprintGenerator(
-        header_options=HeaderGeneratorOptions(browsers=['chromium']),
+        header_options=HeaderGeneratorOptions(browsers=['chrome']),
         screen_options=ScreenOptions(min_width=400),
     )
 

diff --git a/docs/guides/code_examples/http_crawlers/selectolax_parser.py b/docs/guides/code_examples/http_crawlers/selectolax_parser.py
@@ -22,7 +22,7 @@ async def parse(self, response: HttpResponse) -> LexborHTMLParser:
         """Parse HTTP response body into a document object."""
         response_body = await response.read()
         # Run parsing in a thread to avoid blocking the event loop.
-        return await asyncio.to_thread(lambda: LexborHTMLParser(response_body))
+        return await asyncio.to_thread(LexborHTMLParser, response_body)
 
     @override
     async def parse_text(self, text: str) -> LexborHTMLParser:

diff --git a/docs/guides/code_examples/running_in_web_server/server.py b/docs/guides/code_examples/running_in_web_server/server.py
@@ -14,7 +14,7 @@
 app = FastAPI(lifespan=lifespan, title='Crawler app')
 
 
-@app.get('/', response_class=HTMLResponse)  # type: ignore[untyped-decorator]
+@app.get('/', response_class=HTMLResponse)
 def index() -> str:
     return """
 <!DOCTYPE html>
@@ -32,7 +32,7 @@ def index() -> str:
 """
 
 
-@app.get('/scrape')  # type: ignore[untyped-decorator]
+@app.get('/scrape')
 async def scrape_url(request: Request, url: str | None = None) -> dict:
     if not url:
         return {'url': 'missing', 'scrape result': 'no results'}

diff --git a/pyproject.toml b/pyproject.toml
@@ -102,7 +102,6 @@ dev = [
     "build<2.0.0", # For e2e tests.
     "dycw-pytest-only<3.0.0",
     "fakeredis[probabilistic,json,lua]<3.0.0",
-    "mypy~=1.19.0",
     "pre-commit<5.0.0",
     "proxy-py<3.0.0",
     "pydoc-markdown<5.0.0",
@@ -113,6 +112,7 @@ dev = [
     "pytest<9.0.0",
     "ruff~=0.14.0",
     "setuptools", # setuptools are used by pytest, but not explicitly required
+    "ty~=0.0.0",
     "types-beautifulsoup4<5.0.0",
     "types-cachetools<7.0.0",
     "types-colorama<1.0.0",
@@ -230,62 +230,24 @@ filterwarnings = [
     "ignore:websockets.server.WebSocketServerProtocol is deprecated:DeprecationWarning",
 ]
 
-[tool.mypy]
-python_version = "3.10"
-plugins = ["pydantic.mypy"]
+[tool.ty.environment]
+python-version = "3.10"
+
+[tool.ty.src]
+include = ["src", "tests", "scripts", "docs", "website"]
 exclude = [
     "src/crawlee/project_template",
     "docs/guides/code_examples/storage_clients/custom_storage_client_example.py",
 ]
-files = ["src", "tests", "docs", "website"]
-check_untyped_defs = true
-disallow_incomplete_defs = true
-disallow_untyped_calls = true
-disallow_untyped_decorators = true
-disallow_untyped_defs = true
-no_implicit_optional = true
-warn_redundant_casts = true
-warn_return_any = true
-warn_unreachable = true
-warn_unused_ignores = true
-
-[[tool.mypy.overrides]]
-# Example codes are sometimes showing integration of crawlee with external tool, which is not dependency of crawlee.
-module = [
-    "apify",                        # Example code shows integration of apify and crawlee.
-    "apify_fingerprint_datapoints", # Untyped and stubs not available
-    "camoufox",                     # Example code shows integration of camoufox and crawlee.
-    "fastapi",                      # Example code shows running in webserver.
-    "saxonche",                     # Example code shows HttpCrawler with custom parser.
-    "scrapling.*",                  # Example code shows HttpCrawler with custom parser.
-    "selectolax.*",                 # Example code shows HttpCrawler with custom parser.
-    "stagehand.*",                  # Example code shows integration of Stagehand and crawlee.
-    "starlette.*",                  # Example code shows running in webserver.
-    "flask",                        # Example code shows deploy on Google Cloud.
-    "functions_framework",          # Example code shows deploy on Google Cloud.
-    "jaro",                         # Untyped and stubs not available
-    "litestar",                     # Example code shows deploy on Google Cloud Run.
-    "loguru",                       # Example code shows integration of loguru and crawlee for JSON logging.
-    "lxml.*",                       # Example code shows HttpCrawler with custom parser.
-    "sklearn.linear_model",         # Untyped and stubs not available
-    "cookiecutter.*",               # Untyped and stubs not available
-    "inquirer.*",                   # Untyped and stubs not available
-    "pyquery",                      # Example code shows HttpCrawler with custom parser.
-    "warcio.*",                     # Example code shows WARC files creation.
-    "wrapt"                         # Untyped and stubs not available
-]
-ignore_missing_imports = true
 
-[[tool.mypy.overrides]]
-module = [
-    "running_in_web_server.*", # False positive when fastapi not available
+[[tool.ty.overrides]]
+include = [
+    "docs/**/*.py",
+    "website/**/*.py",
 ]
-disable_error_code = ["misc"]
 
-[tool.basedpyright]
-pythonVersion = "3.10"
-typeCheckingMode = "standard"
-include = ["src", "tests", "docs", "website"]
+[tool.ty.overrides.rules]
+unresolved-import = "ignore"
 
 [tool.coverage.report]
 exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:", "assert_never()"]

diff --git a/src/crawlee/_browserforge_workaround.py b/src/crawlee/_browserforge_workaround.py
@@ -20,7 +20,7 @@ def patch_browserforge() -> None:
     def DownloadIfNotExists(**flags: bool) -> None:
         pass
 
-    download.DownloadIfNotExists = DownloadIfNotExists
+    download.DownloadIfNotExists = DownloadIfNotExists  # ty: ignore[invalid-assignment]
 
     import browserforge.bayesian_network
 

diff --git a/src/crawlee/_request.py b/src/crawlee/_request.py
@@ -93,7 +93,7 @@ def __setitem__(self, key: str, value: JsonSerializable) -> None:
     def __delitem__(self, key: str) -> None:
         del self.__pydantic_extra__[key]
 
-    def __iter__(self) -> Iterator[str]:  # type: ignore[override]
+    def __iter__(self) -> Iterator[str]:  # ty: ignore[invalid-method-override]
         yield from self.__pydantic_extra__
 
     def __len__(self) -> int:
@@ -195,7 +195,7 @@ class Request(BaseModel):
     ] = None
     """HTTP request payload."""
 
-    # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
+    # Workaround for Pydantic and type checkers when using Annotated with default_factory
     if TYPE_CHECKING:
         headers: HttpHeaders = HttpHeaders()
         """HTTP request headers."""

diff --git a/src/crawlee/_types.py b/src/crawlee/_types.py
@@ -62,14 +62,14 @@ class HttpHeaders(RootModel, Mapping[str, str]):
 
     model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
 
-    # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
+    # Workaround for Pydantic and type checkers when using Annotated with default_factory
     if TYPE_CHECKING:
         root: dict[str, str] = {}
     else:
         root: Annotated[
             dict[str, str],
             PlainValidator(lambda value: _normalize_headers(value)),
-            Field(default_factory=dict),
+            Field(default_factory=lambda: dict[str, str]()),
         ]
 
     def __getitem__(self, key: str) -> str:
@@ -91,7 +91,7 @@ def __ror__(self, other: HttpHeaders) -> HttpHeaders:
         combined_headers = {**other, **self.root}
         return HttpHeaders(combined_headers)
 
-    def __iter__(self) -> Iterator[str]:  # type: ignore[override]
+    def __iter__(self) -> Iterator[str]:  # ty: ignore[invalid-method-override]
         yield from self.root
 
     def __len__(self) -> int:
@@ -671,17 +671,16 @@ def create_modified_copy(
         get_key_value_store: GetKeyValueStoreFromRequestHandlerFunction | None = None,
     ) -> Self:
         """Create a modified copy of the crawling context with specified changes."""
-        original_fields = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)}
-        modified_fields = {
-            key: value
-            for key, value in {
-                'push_data': push_data,
-                'add_requests': add_requests,
-                'get_key_value_store': get_key_value_store,
-            }.items()
-            if value
-        }
-        return self.__class__(**{**original_fields, **modified_fields})
+        modifications = dict[str, Any]()
+
+        if push_data is not None:
+            modifications['push_data'] = push_data
+        if add_requests is not None:
+            modifications['add_requests'] = add_requests
+        if get_key_value_store is not None:
+            modifications['get_key_value_store'] = get_key_value_store
+
+        return dataclasses.replace(self, **modifications)
 
 
 class GetDataKwargs(TypedDict):

diff --git a/src/crawlee/_utils/context.py b/src/crawlee/_utils/context.py
@@ -44,4 +44,4 @@ async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
 
         return await method(self, *args, **kwargs)
 
-    return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper  # type: ignore[return-value]
+    return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper  # ty: ignore[invalid-return-type]
diff --git a/src/crawlee/_utils/file.py b/src/crawlee/_utils/file.py
@@ -170,7 +170,7 @@ async def export_csv_to_stream(
     if 'lineterminator' not in kwargs:
         kwargs['lineterminator'] = '\n'
 
-    writer = csv.writer(dst, **kwargs)  # type: ignore[arg-type]
+    writer = csv.writer(dst, **kwargs)
     write_header = True
 
     # Iterate over the dataset and write to CSV.

diff --git a/src/crawlee/_utils/globs.py b/src/crawlee/_utils/globs.py
@@ -33,12 +33,12 @@ def _translate(
 
     HACK: This function is copied from CPython stdlib source. It will be released in Python 3.13 as `glob.translate`
     """
-    if not seps:
-        seps = (os.path.sep, os.path.altsep) if os.path.altsep else os.path.sep
+    _seps = ((os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)) if seps is None else seps
 
-    escaped_seps = ''.join(map(re.escape, seps))
-    any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps
+    escaped_seps = ''.join(map(re.escape, _seps))
+    any_sep = f'[{escaped_seps}]' if len(_seps) > 1 else escaped_seps
     not_sep = f'[^{escaped_seps}]'
+
     if include_hidden:
         one_last_segment = f'{not_sep}+'
         one_segment = f'{one_last_segment}{any_sep}'

diff --git a/src/crawlee/_utils/recurring_task.py b/src/crawlee/_utils/recurring_task.py
@@ -25,7 +25,11 @@ class RecurringTask:
     """
 
     def __init__(self, func: Callable, delay: timedelta) -> None:
-        logger.debug(f'Calling RecurringTask.__init__(func={func.__name__}, delay={delay})...')
+        logger.debug(
+            'Calling RecurringTask.__init__(func={%s}, delay={%s})...',
+            func.__name__ if hasattr(func, '__name__') else func.__class__.__name__,
+            delay,
+        )
         self.func = func
         self.delay = delay
         self.task: asyncio.Task | None = None
@@ -55,7 +59,11 @@ async def _wrapper(self) -> None:
 
     def start(self) -> None:
         """Start the recurring task execution."""
-        self.task = asyncio.create_task(self._wrapper(), name=f'Task-recurring-{self.func.__name__}')
+        name = self.func.__name__ if hasattr(self.func, '__name__') else self.func.__class__.__name__
+        self.task = asyncio.create_task(
+            self._wrapper(),
+            name=f'Task-recurring-{name}',
+        )
 
     async def stop(self) -> None:
         """Stop the recurring task execution."""

diff --git a/src/crawlee/_utils/sitemap.py b/src/crawlee/_utils/sitemap.py
@@ -430,10 +430,17 @@ async def parse_sitemap(
     up to the specified maximum depth.
     """
     # Set default options
-    options = options or {}
-    emit_nested_sitemaps = options.get('emit_nested_sitemaps', False)
-    max_depth = options.get('max_depth', float('inf'))
-    sitemap_retries = options.get('sitemap_retries', 3)
+    default_timeout = timedelta(seconds=30)
+    if options:
+        emit_nested_sitemaps = options['emit_nested_sitemaps']
+        max_depth = options['max_depth']
+        sitemap_retries = options['sitemap_retries']
+        timeout = options.get('timeout', default_timeout)
+    else:
+        emit_nested_sitemaps = False
+        max_depth = float('inf')
+        sitemap_retries = 3
+        timeout = default_timeout
 
     # Setup working state
     sources = list(initial_sources)
@@ -472,7 +479,7 @@ async def parse_sitemap(
                 sitemap_retries,
                 emit_nested_sitemaps=emit_nested_sitemaps,
                 proxy_info=proxy_info,
-                timeout=options.get('timeout', timedelta(seconds=30)),
+                timeout=timeout,
             ):
                 yield result
         else:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -44,4 +44,4 @@ async def async_wrapper(self: Any, args: Any, *kwargs: Any) -> Any:

		return await method(self, args, *kwargs)

		return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # type: ignore[return-value]
		return async_wrapper if inspect.iscoroutinefunction(method) else sync_wrapper # ty: ignore[invalid-return-type]