From a2528eda4bbb8c538e9b87ca2cde81679fec4ed0 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Thu, 23 Oct 2025 01:42:59 +0000 Subject: [PATCH 1/4] Use Apify-provided environment variables to obtain PPE pricing information --- src/apify/_charging.py | 132 +++++++++++++++++++++++++----------- src/apify/_configuration.py | 16 +++++ 2 files changed, 109 insertions(+), 39 deletions(-) diff --git a/src/apify/_charging.py b/src/apify/_charging.py index 4d95d9b5..d5625bb8 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -1,16 +1,24 @@ from __future__ import annotations +import json import math from dataclasses import dataclass from datetime import datetime, timezone from decimal import Decimal -from typing import TYPE_CHECKING, Protocol +from typing import TYPE_CHECKING, Any, Protocol from pydantic import TypeAdapter from crawlee._utils.context import ensure_context -from apify._models import ActorRun, PricingModel +from apify._models import ( + ActorRun, + FlatPricePerMonthActorPricingInfo, + FreeActorPricingInfo, + PayPerEventActorPricingInfo, + PricePerDatasetItemActorPricingInfo, + PricingModel, +) from apify._utils import docs_group from apify.log import logger from apify.storages import Dataset @@ -115,20 +123,12 @@ class ChargingManagerImplementation(ChargingManager): def __init__(self, configuration: Configuration, client: ApifyClientAsync) -> None: self._max_total_charge_usd = configuration.max_total_charge_usd or Decimal('inf') + self._configuration = configuration self._is_at_home = configuration.is_at_home self._actor_run_id = configuration.actor_run_id self._purge_charging_log_dataset = configuration.purge_on_start self._pricing_model: PricingModel | None = None - if configuration.test_pay_per_event: - if self._is_at_home: - raise ValueError( - 'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported ' - 'in a local development environment' - ) - - self._pricing_model = 'PAY_PER_EVENT' - self._client = client self._charging_log_dataset: Dataset | None = None @@ -140,41 +140,47 @@ def __init__(self, configuration: Configuration, client: ApifyClientAsync) -> No async def __aenter__(self) -> None: """Initialize the charging manager - this is called by the `Actor` class and shouldn't be invoked manually.""" - self.active = True - - if self._is_at_home: - # Running on the Apify platform - fetch pricing info for the current run. - - if self._actor_run_id is None: - raise RuntimeError('Actor run ID not found even though the Actor is running on Apify') + # Validate config + if self._configuration.test_pay_per_event and self._is_at_home: + raise ValueError( + 'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported ' + 'in a local development environment' + ) - run = run_validator.validate_python(await self._client.run(self._actor_run_id).get()) - if run is None: - raise RuntimeError('Actor run not found') + self.active = True - if run.pricing_info is not None: - self._pricing_model = run.pricing_info.pricing_model + # Retrieve pricing information from env vars or API + pricing_data = await self._fetch_pricing_info() + pricing_info = pricing_data['pricing_info'] + charged_event_counts = pricing_data['charged_event_counts'] + max_total_charge_usd = pricing_data['max_total_charge_usd'] - if run.pricing_info.pricing_model == 'PAY_PER_EVENT': - for event_name, event_pricing in run.pricing_info.pricing_per_event.actor_charge_events.items(): - self._pricing_info[event_name] = PricingInfoItem( - price=event_pricing.event_price_usd, - title=event_pricing.event_title, - ) + # Set pricing model + if self._configuration.test_pay_per_event: + self._pricing_model = 'PAY_PER_EVENT' + else: + self._pricing_model = pricing_info.pricing_model if pricing_info else None + + # Load per-event pricing information + if pricing_info and pricing_info.pricing_model == 'PAY_PER_EVENT': + for event_name, event_pricing in pricing_info.pricing_per_event.actor_charge_events.items(): + self._pricing_info[event_name] = PricingInfoItem( + price=event_pricing.event_price_usd, + title=event_pricing.event_title, + ) - self._max_total_charge_usd = run.options.max_total_charge_usd or self._max_total_charge_usd + self._max_total_charge_usd = max_total_charge_usd - for event_name, count in (run.charged_event_counts or {}).items(): - price = self._pricing_info.get(event_name, PricingInfoItem(Decimal(), title='')).price - self._charging_state[event_name] = ChargingStateItem( - charge_count=count, - total_charged_amount=count * price, - ) + # Load charged event counts + for event_name, count in charged_event_counts.items(): + price = self._pricing_info.get(event_name, PricingInfoItem(Decimal(), title='')).price + self._charging_state[event_name] = ChargingStateItem( + charge_count=count, + total_charged_amount=count * price, + ) + # Set up charging log dataset for local development if not self._is_at_home and self._pricing_model == 'PAY_PER_EVENT': - # We are not running on the Apify platform, but PPE is enabled for testing - open a dataset that - # will contain a log of all charge calls for debugging purposes. - if self._purge_charging_log_dataset: dataset = await Dataset.open(name=self.LOCAL_CHARGING_LOG_DATASET_NAME) await dataset.drop() @@ -328,6 +334,54 @@ def get_charged_event_count(self, event_name: str) -> int: def get_max_total_charge_usd(self) -> Decimal: return self._max_total_charge_usd + async def _fetch_pricing_info(self) -> dict[str, Any]: + """Fetch pricing information from environment variables or API.""" + # Check if pricing info is available via environment variables + if self._configuration.actor_pricing_info and self._configuration.charged_event_counts: + charged_counts = json.loads(self._configuration.charged_event_counts) + + # Validate pricing info with proper discriminator support + pricing_info_adapter: TypeAdapter[ + FreeActorPricingInfo + | FlatPricePerMonthActorPricingInfo + | PricePerDatasetItemActorPricingInfo + | PayPerEventActorPricingInfo + ] = TypeAdapter( + FreeActorPricingInfo + | FlatPricePerMonthActorPricingInfo + | PricePerDatasetItemActorPricingInfo + | PayPerEventActorPricingInfo + ) + pricing_info = pricing_info_adapter.validate_json(self._configuration.actor_pricing_info) + + return { + 'pricing_info': pricing_info, + 'charged_event_counts': charged_counts, + 'max_total_charge_usd': self._configuration.max_total_charge_usd or Decimal('inf'), + } + + # Fall back to API call + if self._is_at_home: + if self._actor_run_id is None: + raise RuntimeError('Actor run ID not found even though the Actor is running on Apify') + + run = run_validator.validate_python(await self._client.run(self._actor_run_id).get()) + if run is None: + raise RuntimeError('Actor run not found') + + return { + 'pricing_info': run.pricing_info, + 'charged_event_counts': run.charged_event_counts or {}, + 'max_total_charge_usd': run.options.max_total_charge_usd or Decimal('inf'), + } + + # Local development without environment variables + return { + 'pricing_info': None, + 'charged_event_counts': {}, + 'max_total_charge_usd': self._configuration.max_total_charge_usd or Decimal('inf'), + } + @dataclass class ChargingStateItem: diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index fe6e89c2..a965e770 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -409,6 +409,22 @@ class Configuration(CrawleeConfiguration): ), ] = None + actor_pricing_info: Annotated[ + str | None, + Field( + alias='apify_actor_pricing_info', + description='JSON string with prising info of the actor', + ), + ] = None + + charged_event_counts: Annotated[ + str | None, + Field( + alias='apify_charged_actor_event_counts', + description='Counts of events that were charged for the actor', + ), + ] = None + @model_validator(mode='after') def disable_browser_sandbox_on_platform(self) -> Self: """Disable the browser sandbox mode when running on the Apify platform. From f8463868e5870dc3b76d9bb9a79f7f7877996342 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Sun, 26 Oct 2025 16:47:55 +0000 Subject: [PATCH 2/4] update types --- src/apify/_charging.py | 63 ++++++++++++++++++------------------- src/apify/_configuration.py | 15 ++++++++- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/apify/_charging.py b/src/apify/_charging.py index d5625bb8..b242d106 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from datetime import datetime, timezone from decimal import Decimal -from typing import TYPE_CHECKING, Any, Protocol +from typing import TYPE_CHECKING, Protocol, TypedDict from pydantic import TypeAdapter @@ -181,6 +181,9 @@ async def __aenter__(self) -> None: # Set up charging log dataset for local development if not self._is_at_home and self._pricing_model == 'PAY_PER_EVENT': + # We are not running on the Apify platform, but PPE is enabled for testing - open a dataset that + # will contain a log of all charge calls for debugging purposes. + if self._purge_charging_log_dataset: dataset = await Dataset.open(name=self.LOCAL_CHARGING_LOG_DATASET_NAME) await dataset.drop() @@ -334,31 +337,15 @@ def get_charged_event_count(self, event_name: str) -> int: def get_max_total_charge_usd(self) -> Decimal: return self._max_total_charge_usd - async def _fetch_pricing_info(self) -> dict[str, Any]: + async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict: """Fetch pricing information from environment variables or API.""" # Check if pricing info is available via environment variables if self._configuration.actor_pricing_info and self._configuration.charged_event_counts: - charged_counts = json.loads(self._configuration.charged_event_counts) - - # Validate pricing info with proper discriminator support - pricing_info_adapter: TypeAdapter[ - FreeActorPricingInfo - | FlatPricePerMonthActorPricingInfo - | PricePerDatasetItemActorPricingInfo - | PayPerEventActorPricingInfo - ] = TypeAdapter( - FreeActorPricingInfo - | FlatPricePerMonthActorPricingInfo - | PricePerDatasetItemActorPricingInfo - | PayPerEventActorPricingInfo + return _FetchedPricingInfoDict( + pricing_info=self._configuration.actor_pricing_info, + charged_event_counts=json.loads(self._configuration.charged_event_counts), + max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'), ) - pricing_info = pricing_info_adapter.validate_json(self._configuration.actor_pricing_info) - - return { - 'pricing_info': pricing_info, - 'charged_event_counts': charged_counts, - 'max_total_charge_usd': self._configuration.max_total_charge_usd or Decimal('inf'), - } # Fall back to API call if self._is_at_home: @@ -369,18 +356,18 @@ async def _fetch_pricing_info(self) -> dict[str, Any]: if run is None: raise RuntimeError('Actor run not found') - return { - 'pricing_info': run.pricing_info, - 'charged_event_counts': run.charged_event_counts or {}, - 'max_total_charge_usd': run.options.max_total_charge_usd or Decimal('inf'), - } + return _FetchedPricingInfoDict( + pricing_info=run.pricing_info, + charged_event_counts=run.charged_event_counts or {}, + max_total_charge_usd=run.options.max_total_charge_usd or Decimal('inf'), + ) # Local development without environment variables - return { - 'pricing_info': None, - 'charged_event_counts': {}, - 'max_total_charge_usd': self._configuration.max_total_charge_usd or Decimal('inf'), - } + return _FetchedPricingInfoDict( + pricing_info=None, + charged_event_counts={}, + max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'), + ) @dataclass @@ -393,3 +380,15 @@ class ChargingStateItem: class PricingInfoItem: price: Decimal title: str + + +class _FetchedPricingInfoDict(TypedDict): + pricing_info: ( + FreeActorPricingInfo + | FlatPricePerMonthActorPricingInfo + | PricePerDatasetItemActorPricingInfo + | PayPerEventActorPricingInfo + | None + ) + charged_event_counts: dict[str, int] + max_total_charge_usd: Decimal diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index a965e770..8760250f 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from datetime import datetime, timedelta from decimal import Decimal from logging import getLogger @@ -14,6 +15,12 @@ from crawlee._utils.urls import validate_http_url from crawlee.configuration import Configuration as CrawleeConfiguration +from apify._models import ( + FlatPricePerMonthActorPricingInfo, + FreeActorPricingInfo, + PayPerEventActorPricingInfo, + PricePerDatasetItemActorPricingInfo, +) from apify._utils import docs_group logger = getLogger(__name__) @@ -410,11 +417,17 @@ class Configuration(CrawleeConfiguration): ] = None actor_pricing_info: Annotated[ - str | None, + FreeActorPricingInfo + | FlatPricePerMonthActorPricingInfo + | PricePerDatasetItemActorPricingInfo + | PayPerEventActorPricingInfo + | None, Field( alias='apify_actor_pricing_info', description='JSON string with prising info of the actor', + discriminator='pricing_model', ), + BeforeValidator(lambda data: json.loads(data) if data else None), ] = None charged_event_counts: Annotated[ From bacb5c534a6929bf4dccaa5b5c1968850d81b7e4 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Mon, 27 Oct 2025 15:15:34 +0000 Subject: [PATCH 3/4] fix types --- src/apify/_charging.py | 3 +-- src/apify/_configuration.py | 5 +++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/apify/_charging.py b/src/apify/_charging.py index b242d106..f6e8c275 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import math from dataclasses import dataclass from datetime import datetime, timezone @@ -343,7 +342,7 @@ async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict: if self._configuration.actor_pricing_info and self._configuration.charged_event_counts: return _FetchedPricingInfoDict( pricing_info=self._configuration.actor_pricing_info, - charged_event_counts=json.loads(self._configuration.charged_event_counts), + charged_event_counts=self._configuration.charged_event_counts, max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'), ) diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 8760250f..53d4afe8 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -427,15 +427,16 @@ class Configuration(CrawleeConfiguration): description='JSON string with prising info of the actor', discriminator='pricing_model', ), - BeforeValidator(lambda data: json.loads(data) if data else None), + BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data if data else None), ] = None charged_event_counts: Annotated[ - str | None, + dict[str, int] | None, Field( alias='apify_charged_actor_event_counts', description='Counts of events that were charged for the actor', ), + BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data if data else None), ] = None @model_validator(mode='after') From 519b4e008fdd717fde2eba80a33778a3cbf7e1bb Mon Sep 17 00:00:00 2001 From: Max Bohomolov <34358312+Mantisus@users.noreply.github.com> Date: Mon, 27 Oct 2025 17:23:47 +0200 Subject: [PATCH 4/4] Update src/apify/_charging.py Co-authored-by: Jan Buchar --- src/apify/_charging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/apify/_charging.py b/src/apify/_charging.py index f6e8c275..33cba9d3 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -339,7 +339,7 @@ def get_max_total_charge_usd(self) -> Decimal: async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict: """Fetch pricing information from environment variables or API.""" # Check if pricing info is available via environment variables - if self._configuration.actor_pricing_info and self._configuration.charged_event_counts: + if self._configuration.actor_pricing_info is not None and self._configuration.charged_event_counts is not None: return _FetchedPricingInfoDict( pricing_info=self._configuration.actor_pricing_info, charged_event_counts=self._configuration.charged_event_counts,