Fix documentation for skip_data flag (#70)

lukasbindreiter · web-flow · commit 122ca9dad6ed · 2025-07-08T16:14:31.000+02:00
diff --git a/api-reference/python/tilebox.datasets/Collection.find.mdx b/api-reference/python/tilebox.datasets/Collection.find.mdx
@@ -19,7 +19,7 @@ Find a specific datapoint in a collection by its id.
 </ParamField>
 
 <ParamField path="skip_data" type="bool">
-    Whether to skip loading the data for the datapoint. If `True`, only the metadata for the datapoint is loaded.
+    If `True`, the response contains only the ID and the timestamp for the datapoint. Defaults to `False`.
 </ParamField>
 
 ## Returns
@@ -38,7 +38,17 @@ Since it returns only a single data point, the output xarray dataset does not in
 ```python Python
 data = collection.find(
     "0186d6b6-66cc-fcfd-91df-bbbff72499c3",
-    skip_data = False,
 )
+
+
+# check if a datapoint exists
+try:
+    collection.find(
+        "0186d6b6-66cc-fcfd-91df-bbbff72499c3",
+        skip_data=True,
+    )
+    exists = True
+except NotFoundError:
+    exists = False
 ```
 </RequestExample>
diff --git a/api-reference/python/tilebox.datasets/Collection.query.mdx b/api-reference/python/tilebox.datasets/Collection.query.mdx
@@ -32,7 +32,7 @@ If no data exists for the requested time or interval, an empty `xarray.Dataset`
 </ParamField>
 
 <ParamField path="skip_data" type="bool">
-  If `True`, the response contains only the [required fields for the dataset type](/datasets/types/timeseries) without the actual dataset-specific fields. Defaults to `False`.
+  If `True`, the response contains only the ID and the timestamp for each datapoint. Defaults to `False`.
 </ParamField>
 
 <ParamField path="show_progress" type="bool">
@@ -54,7 +54,13 @@ data = collection.query(temporal_extent=time)
 
 # querying a time interval
 interval = ("2023-05-01", "2023-08-01")
-data = collection.query(temporal_extent=interval, show_progress=True)
+data = collection.query(temporal_extent=interval)
+
+# displaying a progress bar while querying
+data = collection.query(
+  temporal_extent=interval,
+  show_progress=True,
+)
 
 # querying a time interval with TimeInterval
 interval = TimeInterval(
@@ -63,11 +69,13 @@ interval = TimeInterval(
     start_exclusive=False,
     end_inclusive=False,
 )
-data = collection.query(temporal_extent=interval, show_progress=True)
+data = collection.query(temporal_extent=interval)
 
 # querying with an iterable
-meta_data = collection.query(temporal_extent=..., skip_data=True)
-first_50 = collection.query(temporal_extent=meta_data.time[:50], skip_data=False)
+datapoints = collection.query(
+  temporal_extent=interval,
+  skip_data=True,  # only fetch datapoint IDs and time
+)
+first_50 = collection.query(temporal_extent=datapoints.time[:50])
 ```
 </RequestExample>
-
diff --git a/datasets/delete.mdx b/datasets/delete.mdx
@@ -89,15 +89,15 @@ Deleted 2 data points.
 
 ## Deleting a time interval
 
-One common way to delete data is to first load it from a collection and then forward it to the `delete` method. For
-this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid loading the data fields,
-since you only need the datapoint IDs. See [fetching only metadata](/datasets/query#fetching-only-metadata) for more details.
+One common way to delete all datapoints in a time interval is to first query it from a collection and then deleting those
+found datapoints. For this use case it often is a good idea to query the datapoints with `skip_data=True` to avoid actually
+loading the data fields, since only the datapoint IDs are required. See [skipping data fields](/datasets/query#skipping-data-fields) for more details.
 
 <CodeGroup>
 ```python Python
 to_delete = collection.query(temporal_extent=("2023-05-01", "2023-06-01"), skip_data=True)
 
-n_deleted = collection.delete(datapoints)
+n_deleted = collection.delete(to_delete)
 print(f"Deleted {n_deleted} data points.")
 ```
 ```go Go
diff --git a/datasets/query.mdx b/datasets/query.mdx
@@ -274,9 +274,9 @@ You can specify a time interval by using an iterable of `TimeScalar`s as the `te
 <CodeGroup>
     ```python Python
     interval = ("2017-01-01", "2023-01-01")
-    meta_data = collection.query(temporal_extent=interval, skip_data=True)
+    found_datapoints = collection.query(temporal_extent=interval, skip_data=True)
 
-    first_50_data_points = collection.query(temporal_extent=meta_data.time[:50], skip_data=False)
+    first_50_data_points = collection.query(temporal_extent=found_datapoints.time[:50])
     print(first_50_data_points)
     ```
 </CodeGroup>
@@ -423,19 +423,23 @@ if err != nil {
 ```
 </CodeGroup>
 
-## Fetching only metadata
+## Skipping data fields
 
-Sometimes, it may be useful to load only dataset metadata fields without the actual data fields. This can be done by setting the `skip_data` parameter to `True`.
-For example, when only checking if a datapoint exists, you may want to use `skip_data=True` to avoid loading the data fields.
-If this flag is set, the response will only include the required fields for the given dataset type, but no custom data fields.
+Sometimes, only the ID or timestamp associated with a datapoint is required. In this case, loading the full data fields for each datapoint is not necessary and can be avoided by
+setting the `skip_data` parameter to `True`.
+
+For example, when only checking how many datapoints exist in a given time interval, you can use `skip_data=True` to avoid loading the data fields.
 
 <CodeGroup>
     ```python Python
-    data = collection.query(temporal_extent="2024-08-01 00:00:01.362", skip_data=True)
-    print(data)
+    interval = ("2023-01-01", "2023-02-01")
+    data = collection.query(temporal_extent=interval, skip_data=True)
+    print(f"Found {data.sizes['time']} data points.")
     ```
 ```go Go
-temporalExtent := query.NewPointInTime(time.Date(2024, time.August, 1, 0, 0, 1, 362000000, time.UTC))
+startDate := time.Date(2023, time.January, 1, 0, 0, 0, 0, time.UTC)
+endDate := time.Date(2023, time.February, 1, 0, 0, 0, 0, time.UTC)
+interval := query.NewTimeInterval(startDate, endDate)
 
 var datapoints []*v1.Sentinel1Sar
 err = client.Datapoints.QueryInto(ctx,
@@ -592,10 +596,15 @@ Data variables: (12/30)
 </CodeGroup>
 
 <Tip>
-  You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `load`.
+  You can also set the `skip_data` parameter when calling `find` to query only the required fields of the data point, same as for `query`.
 </Tip>
 
 ## Automatic pagination
 
 Querying large time intervals can return a large number of data points.
 Tilebox automatically handles pagination for you by sending paginated requests to the server.
+
+<Tip>
+When using the python SDK in an interactive notebook environment, you can additionally also display a
+progress bar to keep track of the progress of the query by setting the `show_progress` parameter to `True`.
+</Tip>