Skip to content

Commit 0c32f29

Browse files
Mantisusjanbuchar
andauthored
perf: Use Apify-provided environment variables to obtain PPE pricing information (#644)
closes: #614 --------- Co-authored-by: Jan Buchar <Teyras@gmail.com>
1 parent 7aeca61 commit 0c32f29

File tree

2 files changed

+118
-36
lines changed

2 files changed

+118
-36
lines changed

src/apify/_charging.py

Lines changed: 88 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@
44
from dataclasses import dataclass
55
from datetime import datetime, timezone
66
from decimal import Decimal
7-
from typing import TYPE_CHECKING, Protocol
7+
from typing import TYPE_CHECKING, Protocol, TypedDict
88

99
from pydantic import TypeAdapter
1010

1111
from crawlee._utils.context import ensure_context
1212

13-
from apify._models import ActorRun, PricingModel
13+
from apify._models import (
14+
ActorRun,
15+
FlatPricePerMonthActorPricingInfo,
16+
FreeActorPricingInfo,
17+
PayPerEventActorPricingInfo,
18+
PricePerDatasetItemActorPricingInfo,
19+
PricingModel,
20+
)
1421
from apify._utils import docs_group
1522
from apify.log import logger
1623
from apify.storages import Dataset
@@ -115,20 +122,12 @@ class ChargingManagerImplementation(ChargingManager):
115122

116123
def __init__(self, configuration: Configuration, client: ApifyClientAsync) -> None:
117124
self._max_total_charge_usd = configuration.max_total_charge_usd or Decimal('inf')
125+
self._configuration = configuration
118126
self._is_at_home = configuration.is_at_home
119127
self._actor_run_id = configuration.actor_run_id
120128
self._purge_charging_log_dataset = configuration.purge_on_start
121129
self._pricing_model: PricingModel | None = None
122130

123-
if configuration.test_pay_per_event:
124-
if self._is_at_home:
125-
raise ValueError(
126-
'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported '
127-
'in a local development environment'
128-
)
129-
130-
self._pricing_model = 'PAY_PER_EVENT'
131-
132131
self._client = client
133132
self._charging_log_dataset: Dataset | None = None
134133

@@ -140,37 +139,46 @@ def __init__(self, configuration: Configuration, client: ApifyClientAsync) -> No
140139

141140
async def __aenter__(self) -> None:
142141
"""Initialize the charging manager - this is called by the `Actor` class and shouldn't be invoked manually."""
143-
self.active = True
144-
145-
if self._is_at_home:
146-
# Running on the Apify platform - fetch pricing info for the current run.
147-
148-
if self._actor_run_id is None:
149-
raise RuntimeError('Actor run ID not found even though the Actor is running on Apify')
142+
# Validate config
143+
if self._configuration.test_pay_per_event and self._is_at_home:
144+
raise ValueError(
145+
'Using the ACTOR_TEST_PAY_PER_EVENT environment variable is only supported '
146+
'in a local development environment'
147+
)
150148

151-
run = run_validator.validate_python(await self._client.run(self._actor_run_id).get())
152-
if run is None:
153-
raise RuntimeError('Actor run not found')
149+
self.active = True
154150

155-
if run.pricing_info is not None:
156-
self._pricing_model = run.pricing_info.pricing_model
151+
# Retrieve pricing information from env vars or API
152+
pricing_data = await self._fetch_pricing_info()
153+
pricing_info = pricing_data['pricing_info']
154+
charged_event_counts = pricing_data['charged_event_counts']
155+
max_total_charge_usd = pricing_data['max_total_charge_usd']
157156

158-
if run.pricing_info.pricing_model == 'PAY_PER_EVENT':
159-
for event_name, event_pricing in run.pricing_info.pricing_per_event.actor_charge_events.items():
160-
self._pricing_info[event_name] = PricingInfoItem(
161-
price=event_pricing.event_price_usd,
162-
title=event_pricing.event_title,
163-
)
157+
# Set pricing model
158+
if self._configuration.test_pay_per_event:
159+
self._pricing_model = 'PAY_PER_EVENT'
160+
else:
161+
self._pricing_model = pricing_info.pricing_model if pricing_info else None
162+
163+
# Load per-event pricing information
164+
if pricing_info and pricing_info.pricing_model == 'PAY_PER_EVENT':
165+
for event_name, event_pricing in pricing_info.pricing_per_event.actor_charge_events.items():
166+
self._pricing_info[event_name] = PricingInfoItem(
167+
price=event_pricing.event_price_usd,
168+
title=event_pricing.event_title,
169+
)
164170

165-
self._max_total_charge_usd = run.options.max_total_charge_usd or self._max_total_charge_usd
171+
self._max_total_charge_usd = max_total_charge_usd
166172

167-
for event_name, count in (run.charged_event_counts or {}).items():
168-
price = self._pricing_info.get(event_name, PricingInfoItem(Decimal(), title='')).price
169-
self._charging_state[event_name] = ChargingStateItem(
170-
charge_count=count,
171-
total_charged_amount=count * price,
172-
)
173+
# Load charged event counts
174+
for event_name, count in charged_event_counts.items():
175+
price = self._pricing_info.get(event_name, PricingInfoItem(Decimal(), title='')).price
176+
self._charging_state[event_name] = ChargingStateItem(
177+
charge_count=count,
178+
total_charged_amount=count * price,
179+
)
173180

181+
# Set up charging log dataset for local development
174182
if not self._is_at_home and self._pricing_model == 'PAY_PER_EVENT':
175183
# We are not running on the Apify platform, but PPE is enabled for testing - open a dataset that
176184
# will contain a log of all charge calls for debugging purposes.
@@ -328,6 +336,38 @@ def get_charged_event_count(self, event_name: str) -> int:
328336
def get_max_total_charge_usd(self) -> Decimal:
329337
return self._max_total_charge_usd
330338

339+
async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict:
340+
"""Fetch pricing information from environment variables or API."""
341+
# Check if pricing info is available via environment variables
342+
if self._configuration.actor_pricing_info is not None and self._configuration.charged_event_counts is not None:
343+
return _FetchedPricingInfoDict(
344+
pricing_info=self._configuration.actor_pricing_info,
345+
charged_event_counts=self._configuration.charged_event_counts,
346+
max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'),
347+
)
348+
349+
# Fall back to API call
350+
if self._is_at_home:
351+
if self._actor_run_id is None:
352+
raise RuntimeError('Actor run ID not found even though the Actor is running on Apify')
353+
354+
run = run_validator.validate_python(await self._client.run(self._actor_run_id).get())
355+
if run is None:
356+
raise RuntimeError('Actor run not found')
357+
358+
return _FetchedPricingInfoDict(
359+
pricing_info=run.pricing_info,
360+
charged_event_counts=run.charged_event_counts or {},
361+
max_total_charge_usd=run.options.max_total_charge_usd or Decimal('inf'),
362+
)
363+
364+
# Local development without environment variables
365+
return _FetchedPricingInfoDict(
366+
pricing_info=None,
367+
charged_event_counts={},
368+
max_total_charge_usd=self._configuration.max_total_charge_usd or Decimal('inf'),
369+
)
370+
331371

332372
@dataclass
333373
class ChargingStateItem:
@@ -339,3 +379,15 @@ class ChargingStateItem:
339379
class PricingInfoItem:
340380
price: Decimal
341381
title: str
382+
383+
384+
class _FetchedPricingInfoDict(TypedDict):
385+
pricing_info: (
386+
FreeActorPricingInfo
387+
| FlatPricePerMonthActorPricingInfo
388+
| PricePerDatasetItemActorPricingInfo
389+
| PayPerEventActorPricingInfo
390+
| None
391+
)
392+
charged_event_counts: dict[str, int]
393+
max_total_charge_usd: Decimal

src/apify/_configuration.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import json
34
from datetime import datetime, timedelta
45
from decimal import Decimal
56
from logging import getLogger
@@ -14,6 +15,12 @@
1415
from crawlee._utils.urls import validate_http_url
1516
from crawlee.configuration import Configuration as CrawleeConfiguration
1617

18+
from apify._models import (
19+
FlatPricePerMonthActorPricingInfo,
20+
FreeActorPricingInfo,
21+
PayPerEventActorPricingInfo,
22+
PricePerDatasetItemActorPricingInfo,
23+
)
1724
from apify._utils import docs_group
1825

1926
logger = getLogger(__name__)
@@ -409,6 +416,29 @@ class Configuration(CrawleeConfiguration):
409416
),
410417
] = None
411418

419+
actor_pricing_info: Annotated[
420+
FreeActorPricingInfo
421+
| FlatPricePerMonthActorPricingInfo
422+
| PricePerDatasetItemActorPricingInfo
423+
| PayPerEventActorPricingInfo
424+
| None,
425+
Field(
426+
alias='apify_actor_pricing_info',
427+
description='JSON string with prising info of the actor',
428+
discriminator='pricing_model',
429+
),
430+
BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data if data else None),
431+
] = None
432+
433+
charged_event_counts: Annotated[
434+
dict[str, int] | None,
435+
Field(
436+
alias='apify_charged_actor_event_counts',
437+
description='Counts of events that were charged for the actor',
438+
),
439+
BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data if data else None),
440+
] = None
441+
412442
@model_validator(mode='after')
413443
def disable_browser_sandbox_on_platform(self) -> Self:
414444
"""Disable the browser sandbox mode when running on the Apify platform.

0 commit comments

Comments
 (0)