Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 65 additions & 37 deletions sources/matomo/helpers/matomo_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This module contains an implementation of a Matomo API client for python."""
from typing import Iterator, List, Union
from dlt.common.typing import DictStrAny, DictStrStr, TDataItem, TDataItems
from typing import Iterator, List

from dlt.common.typing import DictStrAny, TDataItem, TDataItems
from dlt.sources.helpers.requests import client


Expand All @@ -17,34 +18,52 @@ class MatomoAPIClient:
API client used to make requests to Matomo API.
"""

def __init__(self, api_token: str, url: str) -> None:
def __init__(self, api_token: str, url: str, call_method: str = "GET") -> None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Literal["GET", "POST"] = "GET" might be a better (narrower) option

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

"""
Initializes the client.

Args:
api_token (str): Token used to authenticate for Matomo API.
url (str): URL of the Matomo website.
call_method (str): HTTP method for API calls, related to authentication,
either "GET" or "POST". Default is "GET" to continue the support for Matomo 4 and below,
for Matomo 5, "POST" is recommended
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be great if the thing with Only allow secure requests is mentioned here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added further explanation

"""

self.base_url = url
self.auth_token = api_token
if call_method.upper() not in ["GET", "POST"]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use Literal, we can drop the .upper() part

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

raise ValueError("call_method must be either 'GET' or 'POST'")
self.call_method = call_method

def _request(self, params: DictStrAny) -> TDataItem:
def _request(
self, base_params: DictStrAny, detailed_params: DictStrAny
) -> TDataItem:
"""
Helper method that retrieves data and returns the JSON response from the API.

Args:
params (DictStrAny): Parameters for the API request.
base_params (DictStrAny): Parameters for the API request.
detailed_params (DictStrAny): Detailed parameters for the API request.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would make sense to explain why we separate the two here (i.e. Matomo 5 requires detailed params)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking that detailed_params (and variables named the same way) is a bit vague, would there be a better option?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already added the explanation, naming convention is not really my expertise... actually I felt something off with this name too, but could not think of any better name, please feel free to recommend one


Returns:
TDataItem: JSON response from the API.
"""

# loop through all the pages
# the total number of rows is received after the first request, for the first request to be sent through, initializing the row_count to 1 would suffice
headers = {"Content-type": "application/json"}

url = f"{self.base_url}/index.php"
response = client.get(url=url, headers=headers, params=params)
if self.call_method.upper() == "POST":
headers = {"Content-type": "application/x-www-form-urlencoded"}
response = client.post(
url=url, headers=headers, data=detailed_params, params=base_params
)
else:
headers = {"Content-type": "application/json"}
final_params = base_params.copy()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if there's a reason to copy the base params, maybe it's cleaner to simply

params = {**base_params, **detailed_params}
response = client.get(url=url, headers=headers, params=params)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

final_params.update(detailed_params)
response = client.get(url=url, headers=headers, params=final_params)
response.raise_for_status()
json_response = response.json()
# matomo returns error with HTTP 200
Expand Down Expand Up @@ -78,20 +97,22 @@ def get_query(
# Set up the API URL and parameters
if not extra_params:
extra_params = {}
params = {
base_params = {
"module": "API",
"method": "API.getBulkRequest",
"format": "json",
}
detailed_params = {
"token_auth": self.auth_token,
}
for i, method in enumerate(methods):
params[
detailed_params[
f"urls[{i}]"
] = f"method={method}&idSite={site_id}&period={period}&date={date}"
# Merge the additional parameters into the request parameters
params.update(extra_params)
detailed_params.update(extra_params)
# Send the API request
return self._request(params=params)
return self._request(base_params=base_params, detailed_params=detailed_params)

def get_method(
self,
Expand All @@ -112,32 +133,39 @@ def get_method(
Yields:
Iterator[TDataItems]: JSON data from the response.
"""

# Set up the API URL and parameters
if not extra_params:
extra_params = {}

filter_offset = 0
params = {

base_params = {
"module": "API",
"method": "API.getBulkRequest",
"format": "json",
"token_auth": self.auth_token,
"urls[0]": f"method={method}&idSite={site_id}&filter_limit={rows_per_page}&filter_offset={filter_offset}",
}
# Merge the additional parameters into the request parameters
params.update(extra_params)
# Send the API request
method_data = self._request(params=params)[0]
while len(method_data):
yield method_data
filter_offset += len(method_data)
params[
"urls[0]"
] = f"method={method}&idSite={site_id}&filter_limit={rows_per_page}&filter_offset={filter_offset}"
method_data = self._request(params=params)[0]

while True:
detailed_params = {
"urls[0]": f"method={method}&idSite={site_id}&filter_limit={rows_per_page}&filter_offset={filter_offset}",
"token_auth": self.auth_token,
}
detailed_params.update(extra_params)
response_data = self._request(
base_params=base_params, detailed_params=detailed_params
)
if not response_data or not isinstance(response_data, list):
break
batch = response_data[0] if len(response_data) > 0 else []
if not batch:
break
yield batch
filter_offset += len(batch)
Comment on lines +146 to +162
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wondering if keeping the previous flow an option here, since it was lightly easier to read

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

During my test, the old flow did not work


def get_visitors_batch(
self, visitor_list: List[str], site_id: int, extra_params: DictStrAny = None
self,
visitor_list: List[str],
site_id: int,
extra_params: DictStrAny = None,
) -> TDataItems:
"""
Gets visitors for Matomo.
Expand All @@ -152,19 +180,19 @@ def get_visitors_batch(
"""
if not extra_params:
extra_params = {}
params = {
base_params = {
"module": "API",
"method": "API.getBulkRequest",
"format": "json",
"site_id": site_id,
"token_auth": self.auth_token,
}
params.update(
{
f"urls[{i}]": f"method=Live.getVisitorProfile&idSite={site_id}&visitorId={visitor_list[i]}"
for i in range(len(visitor_list))
}
detailed_params = {
f"urls[{i}]": f"method=Live.getVisitorProfile&idSite={site_id}&visitorId={visitor_list[i]}"
for i in range(len(visitor_list))
}
detailed_params["token_auth"] = self.auth_token
detailed_params.update(extra_params)
method_data = self._request(
base_params=base_params, detailed_params=detailed_params
)
params.update(extra_params)
method_data = self._request(params=params)
return method_data
Loading