diff --git a/docs/README.md b/docs/README.md
index d203759b7..390466d60 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -40,5 +40,5 @@ pip install -r requirements.txt
### 3. Build the html
```bash
-python -m sphinx -T -b html -d _build/doctrees -D language=en source html
+python3.8 -m sphinx -T -b html -d _build/doctrees -D language=en source html
```
diff --git a/docs/_static/css/pdc.css b/docs/_static/css/pdc.css
index 63a7ed5d1..3818f9e1d 100644
--- a/docs/_static/css/pdc.css
+++ b/docs/_static/css/pdc.css
@@ -62,4 +62,10 @@ div.rst-content dl dt {
background: #efe7fa;
color: #65419d;
border-left: 3px solid #65419d;
+}
+
+.bordered-image {
+ border: 2px solid #555;
+ padding: 4px;
+ border-radius: 4px;
}
\ No newline at end of file
diff --git a/docs/_static/figs/pdc-containers-objects-regions.drawio b/docs/_static/figs/pdc-containers-objects-regions.drawio
new file mode 100644
index 000000000..ce0c690fa
--- /dev/null
+++ b/docs/_static/figs/pdc-containers-objects-regions.drawio
@@ -0,0 +1,229 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/_static/figs/pdc-installation.drawio b/docs/_static/figs/pdc-installation.drawio
new file mode 100644
index 000000000..a02c85433
--- /dev/null
+++ b/docs/_static/figs/pdc-installation.drawio
@@ -0,0 +1,187 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/_static/image/pdc-containers-objects-regions.png b/docs/_static/image/pdc-containers-objects-regions.png
new file mode 100644
index 000000000..036136ca9
Binary files /dev/null and b/docs/_static/image/pdc-containers-objects-regions.png differ
diff --git a/docs/_static/image/pdc-installation.png b/docs/_static/image/pdc-installation.png
new file mode 100644
index 000000000..4734660f4
Binary files /dev/null and b/docs/_static/image/pdc-installation.png differ
diff --git a/docs/container_hashtable_checkpoint.png b/docs/container_hashtable_checkpoint.png
deleted file mode 100644
index 784a9a5e4..000000000
Binary files a/docs/container_hashtable_checkpoint.png and /dev/null differ
diff --git a/docs/data_hashtable_checkpoint.png b/docs/data_hashtable_checkpoint.png
deleted file mode 100644
index 60cda6851..000000000
Binary files a/docs/data_hashtable_checkpoint.png and /dev/null differ
diff --git a/docs/data_server_checkpoint.png b/docs/data_server_checkpoint.png
deleted file mode 100644
index 306057a8c..000000000
Binary files a/docs/data_server_checkpoint.png and /dev/null differ
diff --git a/docs/pdc_metadata_flow.png b/docs/pdc_metadata_flow.png
deleted file mode 100644
index d73aea351..000000000
Binary files a/docs/pdc_metadata_flow.png and /dev/null differ
diff --git a/docs/pdc_plots.pptx b/docs/pdc_plots.pptx
deleted file mode 100644
index 784fa8c18..000000000
Binary files a/docs/pdc_plots.pptx and /dev/null differ
diff --git a/docs/pdc_region_transfer_request_flow.png b/docs/pdc_region_transfer_request_flow.png
deleted file mode 100644
index 2031310ea..000000000
Binary files a/docs/pdc_region_transfer_request_flow.png and /dev/null differ
diff --git a/docs/source/advanced_topics.rst b/docs/source/advanced_topics.rst
new file mode 100644
index 000000000..d3e0cadd7
--- /dev/null
+++ b/docs/source/advanced_topics.rst
@@ -0,0 +1,60 @@
+.. _advanced_topics:
+
+**4.** Advanced Topics
+======================
+
+**4.1** Asynchronous I/O and Event Handling
+-------------------------------------------
+
+PDC’s architecture enables efficient asynchronous I/O operations, allowing computation and communication to overlap,
+which improves application performance especially in HPC environments.
+
+Using asynchronous APIs:
+* PDC provides non-blocking APIs for data transfer such as PDCregion_transfer_start() which initiates a transfer without waiting for completion
+* Multiple asynchronous transfers can be launched concurrently to maximize throughputs
+* Buffers remain valid until the transfer completes, so the application should avoid modifying memory before transfer completion.
+
+Monitoring events:
+* Applications can query the status of transfers using event monitoring APIs like PDCregion_transfer_wait() or polling mechanisms
+* Event callbacks can be registered to handle completion asynchronously, improving responsiveness and resource management.
+
+Waiting for events:
+* Synchronization can be achieved by explicitly waiting for event completion to ensure data consistency
+* Use blocking calls or condition variables to coordinate dependent computations after I/O completion
+
+
+**4.2** Scalability and Performance
+-----------------------------------
+
+Data placement:
+* PDC supports policy-driven data placement to optimize locality and bandwidth usage
+* Object metadata guides data distribution across different storage hierarchies
+* Applications can hint preferred storage classes or tiers to improve I/O performance.
+
+PDC server tuning:
+* Server-side parameters such as thread counts, buffer sizes, and cache policies can be tuned for target workloads and for different data types
+* Load balancing between servers ensures no single node becomes a bottleneck, and that the data is balanced properly
+* Profiling server behavior helps identify hot spots or resource contention.
+
+
+
+**4.3** Integration with MPI and Libraries
+------------------------------------------
+
+Using PDC with MPI:
+
+PDC seamlessly integrates with MPI for communication in distributed-memory environments.
+MPI ranks act as PDC clients issuing data operations concurrently.
+Also, MPI synchronization primitives can coordinate phases of PDC usage.
+
+Comparison between libraries:
+
+* Unlike traditional MPI-IO, PDC offers object-based APIs with asynchronous data transfers and metadata indexing
+* Compared to HDF5 or ADIOS, PDC provides a more flexible abstraction layer, optimized for highly scalable and concurrent workloads
+* PDC’s client-server model decouples data access from storage layout, enabling adaptable backends.
+
+**4.4** PDC HDF5
+------------------------------------------
+
+TODO use cases, examples
+
diff --git a/docs/source/api.rst b/docs/source/api.rst
deleted file mode 100644
index 56de98045..000000000
--- a/docs/source/api.rst
+++ /dev/null
@@ -1,914 +0,0 @@
-==================================
-API Documentation with Examples
-==================================
-
----------------------------
-PDC general APIs
----------------------------
-
-.. function:: pdcid_t PDCinit(const char *pdc_name)
-
- :param pdc_name: Reference name for the PDC class. Recommended: "pdc".
- :returns: PDC class ID used for future reference.
-
- All PDC client applications must call ``PDCinit`` before using any PDC functionality.
- This function sets up connections from clients to servers. A valid PDC server must be running.
-
- For developers: currently implemented in `pdc.c`.
-
-.. function:: perr_t PDCclose(pdcid_t pdcid)
-
- :param pdcid: PDC class ID returned from ``PDCinit``.
- :returns: ``SUCCEED`` if no error; otherwise, ``FAIL``.
-
- This is the proper way to end a client-server connection for PDC.
- Every call to ``PDCinit`` must correspond to a call to ``PDCclose``.
-
- For developers: currently implemented in `pdc.c`.
-
-.. function:: perr_t PDC_Client_close_all_server()
-
- :returns: ``SUCCEED`` if no error; otherwise, ``FAIL``.
-
- Closes all running PDC servers.
-
- For developers: see `PDC_client_connect.c`.
-
----------------------------
-PDC container APIs
----------------------------
-
-.. function:: pdcid_t PDCcont_create(const char *cont_name, pdcid_t cont_prop_id)
-
- :param cont_name: the name of container. e.g "c1", "c2"
- :param cont_prop_id: property ID for inheriting a PDC property for container.
- :returns: pdc_id for future referencing of this container, returned from PDC servers.
-
- Create a PDC container for future use.
-
- For developers: currently implemented in `pdc_cont.c`. This function will send a name to server and receive a container id. This function will allocate necessary memories and initialize properties for a container.
-
-.. function:: pdcid_t PDCcont_create_col(const char *cont_name, pdcid_t cont_prop_id)
-
- :param cont_name: the name to be assigned to a container. e.g "c1", "c2"
- :param cont_prop_id: property ID for inheriting a PDC property for container.
- :returns: pdc_id for future referencing.
-
- Exactly the same as ``PDCcont_create``, except all processes must call this function collectively. Create a PDC container for future use collectively.
-
- For developers: currently implemented in `pdc_cont.c`.
-
-.. function:: pdcid_t PDCcont_open(const char *cont_name, pdcid_t pdc)
-
- :param cont_name: the name of container used for PDCcont_create.
- :param pdc: PDC class ID returned from PDCinit.
- :returns: error code. FAIL OR SUCCEED
-
- Open a container. Must make sure a container named ``cont_name`` is properly created (registered by PDCcont_create at remote servers).
-
- For developers: currently implemented in `pdc_cont.c`. This function will make sure the metadata for a container is returned from servers. For collective operations, rank 0 is going to broadcast this metadata ID to the rest of processes. A struct ``_pdc_cont_info`` is created locally for future reference.
-
-.. function:: perr_t PDCcont_close(pdcid_t id)
-
- :param id: container ID, returned from PDCcont_create.
- :returns: error code, SUCCEED or FAIL.
-
- Corresponds to ``PDCcont_open``. Must be called only once when a container is no longer used in the future.
-
- For developers: currently implemented in `pdc_cont.c`. The reference counter of a container is decremented. When the counter reaches zero, the memory of the container can be freed later.
-
-.. function:: struct pdc_cont_info *PDCcont_get_info(const char *cont_name)
-
- :param cont_name: name of the container
- :returns: Pointer to a new structure that contains the container information.
-
- Get container information.
-
- For developers: See `pdc_cont.c`. Use name to search for pdc_id first by linked list lookup. Make a copy of the metadata to the newly malloced structure.
-
-.. function:: perr_t PDCcont_persist(pdcid_t cont_id)
-
- :param cont_id: container ID, returned from PDCcont_create.
- :returns: error code, SUCCEED or FAIL.
-
- Make a PDC container persist.
-
- For developers: see `pdc_cont.c`. Set the container life field ``PDC_PERSIST``.
-
-.. function:: perr_t PDCprop_set_cont_lifetime(pdcid_t cont_prop, pdc_lifetime_t cont_lifetime)
-
- :param cont_prop: Container property pdc_id
- :param cont_lifetime: See container life time (Get container life time link)
- :returns: error code, SUCCEED or FAIL.
-
- Set container life time for a property.
-
- For developers: see `pdc_cont.c`.
-
-.. function:: pdcid_t PDCcont_get_id(const char *cont_name, pdcid_t pdc_id)
-
- :param cont_name: Name of the container
- :param pdc_id: PDC class ID, returned by PDCinit
- :returns: container ID
-
- Get container ID by name. This function is similar to open.
-
- For developers: see `pdc_client_connect.c`. It will query the servers for container information and create a container structure locally.
-
-.. function:: perr_t PDCcont_del(pdcid_t cont_id)
-
- :param cont_id: container ID, returned from PDCcont_create.
- :returns: error code, SUCCEED or FAIL.
-
- Delete a container.
-
- For developers: see `pdc_client_connect.c`. Need to send RPCs to servers for metadata update.
-
-.. function:: perr_t PDCcont_put_tag(pdcid_t cont_id, char *tag_name, void *tag_value, psize_t value_size)
-
- :param cont_id: Container ID, returned from PDCcont_create.
- :param tag_name: Name of the tag
- :param tag_value: Value to be written under the tag
- :param value_size: Number of bytes for the tag_value (tag_size may be more informative)
- :returns: error code, SUCCEED or FAIL.
-
- Record a tag_value under the name ``tag_name`` for the container referenced by ``cont_id``.
-
- For developers: see `pdc_client_connect.c`. Need to send RPCs to servers for metadata update.
-
-.. function:: perr_t PDCcont_get_tag(pdcid_t cont_id, char *tag_name, void **tag_value, psize_t *value_size)
-
- :param cont_id: Container ID, returned from PDCcont_create.
- :param tag_name: Name of the tag
- :param value_size: Number of bytes for the tag_value (tag_size may be more informative)
- :returns:
- * tag_value: Pointer to the value to be read under the tag
- * error code, SUCCEED or FAIL.
-
- Retrieve a tag value to the memory space pointed by the ``tag_value`` under the name ``tag_name`` for the container referenced by ``cont_id``.
-
- For developers: see `pdc_client_connect.c`. Need to send RPCs to servers for metadata retrival.
-
-.. function:: perr_t PDCcont_del_tag(pdcid_t cont_id, char *tag_name)
-
- :param cont_id: Container ID, returned from PDCcont_create.
- :param tag_name: Name of the tag
- :returns: error code, SUCCEED or FAIL.
-
- Delete a tag for a container by name.
-
- For developers: see `pdc_client_connect.c`. Need to send RPCs to servers for metadata update.
-
-.. function:: perr_t PDCcont_put_objids(pdcid_t cont_id, int nobj, pdcid_t *obj_ids)
-
- :param cont_id: Container ID, returned from PDCcont_create.
- :param nobj: Number of objects to be written
- :param obj_ids: Pointers to the object IDs
- :returns: error code, SUCCEED or FAIL.
-
- Put an array of objects to a container.
-
- For developers: see `pdc_client_connect.c`. Need to send RPCs to servers for metadata update.
-
-.. function:: perr_t PDCcont_get_objids(pdcid_t cont_id ATTRIBUTE(unused), int *nobj ATTRIBUTE(unused), pdcid_t **obj_ids ATTRIBUTE(unused))
-
- :returns: TODO
-
-.. function:: perr_t PDCcont_del_objids(pdcid_t cont_id, int nobj, pdcid_t *obj_ids)
-
- :param cont_id: Container ID, returned from PDCcont_create.
- :param nobj: Number of objects to be deleted
- :param obj_ids: Pointers to the object IDs
- :returns: error code, SUCCEED or FAIL.
-
- Delete an array of objects from a container.
-
- For developers: see `pdc_client_connect.c`. Need to send RPCs to servers for metadata update.
-
----------------------------
-PDC object APIs
----------------------------
-
-.. function:: pdcid_t PDCobj_create(pdcid_t cont_id, const char *obj_name, pdcid_t obj_prop_id)
-
- :param cont_id: Container ID, returned from ``PDCcont_create``.
- :param obj_name: Name of the object to be created.
- :param obj_prop_id: Property ID to inherit from.
- :returns: Local object ID.
-
- Create a PDC object. This function sends the object name to the servers and receives an object ID in response.
- The created object inherits attributes from its container and the specified property.
-
- For developers: see `pdc_obj.c`.
-
-.. function:: pdcid_t PDCobj_create_mpi(pdcid_t cont_id, const char *obj_name, pdcid_t obj_prop_id, int rank_id, MPI_Comm comm)
-
- :param cont_id: Container ID, returned from ``PDCcont_create``.
- :param obj_name: Name of the object to be created.
- :param obj_prop_id: Property ID to inherit from.
- :param rank_id: Rank ID where the object is placed.
- :param comm: MPI communicator.
- :returns: Local object ID.
-
- Collective operation to create a PDC object on the specified rank within the communicator.
- If `rank_id` matches the local rank, a local object is created; otherwise, a global object is created.
- The object metadata ID is broadcast to all processes using ``MPI_Bcast``.
-
- For developers: see `pdc_mpi.c`.
-
-.. function:: pdcid_t PDCobj_open(const char *obj_name, pdcid_t pdc)
-
- :param obj_name: Name of the object to open.
- :param pdc: PDC class ID, returned from ``PDCinit``.
- :returns: Local object ID.
-
- Open an existing PDC object by name. If the object has already been created or opened, the same ID is returned.
- Each call to ``PDCobj_open`` must be followed by a corresponding call to ``PDCobj_close``.
-
- For developers: see `pdc_obj.c`.
-
-.. function:: perr_t PDCobj_close(pdcid_t obj_id)
-
- :param obj_id: Local object ID to be closed.
- :returns: Error code, either ``SUCCEED`` or ``FAIL``.
-
- Close an object and release associated resources. Each open must be matched with a close.
-
- For developers: see `pdc_obj.c`.
-
-.. function:: struct pdc_obj_info *PDCobj_get_info(pdcid_t obj)
-
- :param obj: Local object ID.
- :returns: Pointer to object metadata structure.
-
- Retrieve metadata information associated with a local object.
-
- For developers: see `pdc_obj.c`.
-
-.. function:: pdcid_t PDCobj_put_data(const char *obj_name, void *data, uint64_t size, pdcid_t cont_id)
-
- :param obj_name: Name of the object.
- :param data: Pointer to the data memory.
- :param size: Size of the data in bytes.
- :param cont_id: Container ID of the object.
- :returns: Local object ID created locally with the input name.
-
- Write data to a PDC object. The operation sends RPCs to servers to perform the write.
-
- For developers: see `pdc_client_connect.c`. *(TODO: Change return value to `perr_t`.)*
-
-.. function:: perr_t PDCobj_get_data(pdcid_t obj_id, void *data, uint64_t size)
-
- :param obj_id: Local object ID.
- :param size: Size of the data to read.
- :returns: Error code, either ``SUCCEED`` or ``FAIL``.
- :param data: Pointer to memory where data will be written.
-
- Read data from a PDC object.
-
- For developers: see `pdc_client_connect.c`. Uses ``PDCobj_get_info`` to look up the name, which is then forwarded to the servers to complete the request.
-
-.. function:: perr_t PDCobj_del_data(pdcid_t obj_id)
-
- :param obj_id: Local object ID.
- :returns: Error code, either ``SUCCEED`` or ``FAIL``.
-
- Delete the data associated with a PDC object.
-
- For developers: see `pdc_client_connect.c`. Uses ``PDCobj_get_info`` to retrieve the object name, then sends the deletion request to the servers.
-
----------------------------
-PDC metadata APIs
----------------------------
-PDC maintains object metadata (obj name, dimension, create time, etc.) in a distributed hash table. Each object's metadata can be
-accessed with its object ID. Users can also issue metadata queries to retrieve the object IDs that meet the query constraints.
-
-PDC allows users to add key-value tags to each object, where key is a string and value can be a binary array of any datatype and length.
-The key-value tags are stored in an in-memory linked list by default.
-
-PDC has metadata indexing and querying support when DART is enabled. See ``DART`` section in the Developer Notes.
-
-PDC additionally supports managing the key-value tags with RocksDB and SQLite, both are considered experimental at the moment.
-Either RocksDB or SQLite can be enabled by turning on the ``PDC_ENABLE_ROCKSDB`` or ``PDC_USE_SQLITE3`` flag in CMake, setting the
-``ROCKSDB_DIR`` or ``SQLITE3_DIR`` and setting the environment variable ``PDC_USE_ROCKSDB`` or ``PDC_USE_SQLITE3`` to 1 before launching the server.
-Users can use the same PDC query APIs when RocksDB or SQLite is enabled.
-
-
-.. function:: perr_t PDCobj_put_tag(pdcid_t obj_id, char *tag_name, void *tag_value, psize_t value_size)
-
- :param obj_id: Local object ID.
- :param tag_name: Name of the tag to be set.
- :param tag_value: Pointer to the value of the tag.
- :param value_size: Size of the tag value in bytes.
- :returns: Error code, SUCCEED or FAIL.
-
- Set the tag value for a given object.
-
- For developers: see `pdc_client_connect.c`. Uses ``PDC_add_kvtag`` to send RPCs to the servers for metadata updates.
-
-.. function:: perr_t PDCobj_get_tag(pdcid_t obj_id, char *tag_name, void **tag_value, psize_t *value_size)
-
- :param obj_id: Local object ID.
- :param tag_name: Name of the tag to be retrieved.
- :param tag_value: Pointer to the buffer to receive the tag value.
- :param value_size: Pointer to the size of the tag value in bytes.
- :returns: Error code, SUCCEED or FAIL.
-
- Retrieve the value of a tag associated with an object.
-
- For developers: see `pdc_client_connect.c`. Uses ``PDC_get_kvtag`` to send RPCs to the servers for metadata retrieval.
-
-.. function:: perr_t PDCobj_del_tag(pdcid_t obj_id, char *tag_name)
-
- :param obj_id: Local object ID.
- :param tag_name: Name of the tag to be deleted.
- :returns: Error code, SUCCEED or FAIL.
-
- Delete a tag associated with an object.
-
- For developers: see `pdc_client_connect.c`. Uses ``PDCtag_delete`` to send RPCs to the servers for metadata updates.
-
----------------------------
-PDC Data query APIs
----------------------------
-
-.. function:: pdc_query_t *PDCquery_create(pdcid_t obj_id, pdc_query_op_t op, pdc_var_type_t type, void *value)
-
- :param obj_id: Local PDC object ID.
- :param op: One of the PDC query operators.
- :param type: One of the PDC basic types.
- :param value: Constraint value.
- :returns: A new query structure.
-
- Create a PDC query.
-
- For developers: see `pdc_query.c`. The constraint field of the new query structure is filled with the input arguments. Searches for the metadata ID using the object ID.
-
-.. function:: void PDCquery_free(pdc_query_t *query)
-
- :param query: PDC query from `PDCquery_create`.
-
- Free a query structure.
-
- For developers: see `pdc_client_server_common.c`.
-
-.. function:: void PDCquery_free_all(pdc_query_t *root)
-
- :param root: Root of queries to be freed.
- :returns: Error code, SUCCEED or FAIL.
-
- Free all queries from a root.
-
- For developers: see `pdc_client_server_common.c`. Recursively frees left and right branches.
-
-.. function:: pdc_query_t *PDCquery_and(pdc_query_t *q1, pdc_query_t *q2)
-
- :param q1: First query.
- :param q2: Second query.
- :returns: A new query after applying the AND operator.
-
- Perform the AND operation on two PDC queries.
-
- For developers: see `pdc_query.c`.
-
-.. function:: pdc_query_t *PDCquery_or(pdc_query_t *q1, pdc_query_t *q2)
-
- :param q1: First query.
- :param q2: Second query.
- :returns: A new query after applying the OR operator.
-
- Perform the OR operation on two PDC queries.
-
- For developers: see `pdc_query.c`.
-
-.. function:: perr_t PDCquery_sel_region(pdc_query_t *query, struct pdc_region_info *obj_region)
-
- :param query: Query to select the region.
- :param obj_region: An object region.
- :returns: Error code, SUCCEED or FAIL.
-
- Select a region for a PDC query.
-
- For developers: see `pdc_query.c`. Sets the region pointer of the query structure to `obj_region`.
-
-.. function:: perr_t PDCquery_get_selection(pdc_query_t *query, pdc_selection_t *sel)
-
- :param query: Query to get the selection.
- :param sel: Pointer to PDC selection structure.
- :returns: Error code, SUCCEED or FAIL.
-
- Get the selection information of a PDC query.
-
- For developers: see `pdc_query.c` and `PDC_send_data_query` in `pdc_client_connect.c`. Copies the selection structure received from servers to the `sel` pointer.
-
-.. function:: perr_t PDCquery_get_nhits(pdc_query_t *query, uint64_t *n)
-
- :param query: Query to calculate the number of hits.
- :param n: Pointer to number of hits.
- :returns: Error code, SUCCEED or FAIL.
-
- Get the number of hits for a PDC query.
-
- For developers: see `pdc_query.c` and `PDC_send_data_query` in `pdc_client_connect.c`. Uses the same selection mechanism as `PDCquery_get_selection`.
-
-.. function:: perr_t PDCquery_get_data(pdcid_t obj_id, pdc_selection_t *sel, void *obj_data)
-
- :param obj_id: The object for query.
- :param sel: Selection of the query (query ID is embedded).
- :param obj_data: Pointer to memory for storing query data.
- :returns: Error code, SUCCEED or FAIL.
-
- Retrieve data from a PDC query for an object.
-
- For developers: see `pdc_query.c` and `PDC_Client_get_sel_data` in `pdc_client_connect.c`.
-
-.. function:: perr_t PDCquery_get_histogram(pdcid_t obj_id)
-
- :param obj_id: The object for query.
- :returns: Error code, SUCCEED or FAIL.
-
- Retrieve histogram from a query for a PDC object.
-
- For developers: see `pdc_query.c`. This is a local operation and is currently a no-op.
-
-.. function:: void PDCselection_free(pdc_selection_t *sel)
-
- :param sel: Pointer to the selection to be freed.
-
- Free a selection structure.
-
- For developers: see `pdc_client_connect.c`. Frees the coordinates.
-
-.. function:: void PDCquery_print(pdc_query_t *query)
-
- :param query: The query to be printed.
-
- Print the details of a PDC query structure.
-
- For developers: see `pdc_client_server_common.c`.
-
-.. function:: void PDCselection_print(pdc_selection_t *sel)
-
- :param sel: The PDC selection to be printed.
-
- Print the details of a PDC selection structure.
-
- For developers: see `pdc_client_server_common.c`.
-
----------------------------
-PDC hist APIs
----------------------------
-
-.. function:: pdc_histogram_t *PDC_gen_hist(pdc_var_type_t dtype, uint64_t n, void *data)
-
- :param dtype: One of the PDC basic types. See `PDC basic types <#>`_.
- :param n: Number of values with the basic type.
- :param data: Pointer to the data buffer.
- :returns: A new PDC histogram structure. See `PDC histogram structure <#>`_.
-
- Generate a PDC histogram from data. This can be used to optimize performance.
-
- For developers: see `pdc_hist_pkg.c`.
-
-
-.. function:: pdc_histogram_t *PDC_dup_hist(pdc_histogram_t *hist)
-
- :param hist: A PDC histogram structure. See `PDC histogram structure <#>`_.
- :returns: A copied PDC histogram structure. See `PDC histogram structure <#>`_.
-
- Create a copy of an existing PDC histogram.
-
- For developers: see `pdc_hist_pkg.c`.
-
-
-.. function:: pdc_histogram_t *PDC_merge_hist(int n, pdc_histogram_t **hists)
-
- :param n: Number of histograms to merge.
- :param hists: Array of PDC histogram structures. See `PDC histogram structure <#>`_.
- :returns: A merged PDC histogram structure. See `PDC histogram structure <#>`_.
-
- Merge multiple PDC histograms into one.
-
- For developers: see `pdc_hist_pkg.c`.
-
-
-.. function:: void PDC_free_hist(pdc_histogram_t *hist)
-
- :param hist: The PDC histogram structure to be freed. See `PDC histogram structure <#>`_.
- :returns: None.
-
- Free the memory allocated for a PDC histogram.
-
- For developers: see `pdc_hist_pkg.c`. Frees the internal arrays of the structure.
-
-
-.. function:: void PDC_print_hist(pdc_histogram_t *hist)
-
- :param hist: The PDC histogram structure to be printed. See `PDC histogram structure <#>`_.
- :returns: None.
-
- Print the contents of a PDC histogram, including bin counters.
-
- For developers: see `pdc_hist_pkg.c`.
-
----------------------------
-Basic types
----------------------------
-
-.. code-block:: c
-
- typedef enum {
- PDC_UNKNOWN = -1, /* error */
- PDC_INT = 0, /* integer types (identical to int32_t) */
- PDC_FLOAT = 1, /* floating-point types */
- PDC_DOUBLE = 2, /* double types */
- PDC_CHAR = 3, /* character types */
- PDC_STRING = 4, /* string types */
- PDC_BOOLEAN = 5, /* boolean types */
- PDC_SHORT = 6, /* short types */
- PDC_UINT = 7, /* unsigned integer types (identical to uint32_t) */
- PDC_INT64 = 8, /* 64-bit integer types */
- PDC_UINT64 = 9, /* 64-bit unsigned integer types */
- PDC_INT16 = 10, /* 16-bit integer types */
- PDC_INT8 = 11, /* 8-bit integer types */
- PDC_UINT8 = 12, /* 8-bit unsigned integer types */
- PDC_UINT16 = 13, /* 16-bit unsigned integer types */
- PDC_INT32 = 14, /* 32-bit integer types */
- PDC_UINT32 = 15, /* 32-bit unsigned integer types */
- PDC_LONG = 16, /* long types */
- PDC_VOID_PTR = 17, /* void pointer type */
- PDC_SIZE_T = 18, /* size_t type */
- PDC_TYPE_COUNT = 19 /* this is the number of var types and has to be the last */
- } pdc_c_var_type_t;
-
----------------------------
-Histogram structure
----------------------------
-
-.. code-block:: c
-
- typedef struct pdc_histogram_t {
- pdc_var_type_t dtype;
- int nbin;
- double incr;
- double *range;
- uint64_t *bin;
- } pdc_histogram_t;
-
----------------------------
-Container info
----------------------------
-
-.. code-block:: c
-
- struct pdc_cont_info {
- /*Inherited from property*/
- char *name;
- /*Registered using PDC_id_register */
- pdcid_t local_id;
- /* Need to register at server using function PDC_Client_create_cont_id */
- uint64_t meta_id;
- };
-
----------------------------
-Container life time
----------------------------
-
-.. code-block:: c
-
- typedef enum {
- PDC_PERSIST,
- PDC_TRANSIENT
- } pdc_lifetime_t;
-
----------------------------
-Object property public
----------------------------
-
-.. code-block:: c
-
- struct pdc_obj_prop *obj_prop_pub {
- /* This ID is the one returned from PDC_id_register . This is a property ID*/
- pdcid_t obj_prop_id;
- /* object dimensions */
- size_t ndim;
- uint64_t *dims;
- pdc_var_type_t type;
- };
-
----------------------------
-Object property
----------------------------
-
-.. code-block:: c
-
- struct _pdc_obj_prop {
- /* Suffix _pub probably means public attributes to be accessed. */
- struct pdc_obj_prop *obj_prop_pub {
- /* This ID is the one returned from PDC_id_register . This is a property ID*/
- pdcid_t obj_prop_id;
- /* object dimensions */
- size_t ndim;
- uint64_t *dims;
- pdc_var_type_t type;
- };
- /* This ID is returned from PDC_find_id with an input of ID returned from PDC init.
- * This is true for both object and container.
- * I think it is referencing the global PDC engine through its ID (or name). */
- struct _pdc_class *pdc{
- char *name;
- pdcid_t local_id;
- };
- /* The following are created with NULL values in the PDC_obj_create function. */
- uint32_t user_id;
- char *app_name;
- uint32_t time_step;
- char *data_loc;
- char *tags;
- void *buf;
- pdc_kvtag_t *kvtag;
-
- /* The following have been added to support of PDC analysis and transforms.
- Will add meanings to them later, they are not critical. */
- size_t type_extent;
- uint64_t locus;
- uint32_t data_state;
- struct _pdc_transform_state transform_prop{
- _pdc_major_type_t storage_order;
- pdc_var_type_t dtype;
- size_t ndim;
- uint64_t dims[4];
- int meta_index; /* transform to this state */
- };
- };
-
----------------------------
-Object info
----------------------------
-
-.. code-block:: c
-
- struct pdc_obj_info {
- /* Directly coped from user argument at object creation. */
- char *name;
- /* 0 for location = PDC_OBJ_LOAL.
- * When PDC_OBJ_GLOBAL = 1, use PDC_Client_send_name_recv_id to retrieve ID. */
- pdcid_t meta_id;
- /* Registered using PDC_id_register */
- pdcid_t local_id;
- /* Set to 0 at creation time. *
- int server_id;
- /* Object property. Directly copy from user argument at object creation. */
- struct pdc_obj_prop *obj_pt;
- };
-
----------------------------
-Object structure
----------------------------
-
-.. code-block:: c
-
- struct _pdc_obj_info {
- /* Public properties */
- struct pdc_obj_info *obj_info_pub {
- /* Directly copied from user argument at object creation. */
- char *name;
- /* 0 for location = PDC_OBJ_LOAL.
- * When PDC_OBJ_GLOBAL = 1, use PDC_Client_send_name_recv_id to retrieve ID. */
- pdcid_t meta_id;
- /* Registered using PDC_id_register */
- pdcid_t local_id;
- /* Set to 0 at creation time. *
- int server_id;
- /* Object property. Directly copy from user argument at object creation. */
- struct pdc_obj_prop *obj_pt;
- };
- /* Argument passed to obj create*/
- _pdc_obj_location_t location enum {
- /* Either local or global */
- PDC_OBJ_GLOBAL,
- PDC_OBJ_LOCAL
- }
- /* May be used or not used depending on which creation function called. */
- void *metadata;
- /* The container pointer this object sits in. Copied*/
- struct _pdc_cont_info *cont;
- /* Pointer to object property. Copied*/
- struct _pdc_obj_prop *obj_pt;
- /* Linked list for region, initialized with NULL at create time.*/
- struct region_map_list *region_list_head {
- pdcid_t orig_reg_id;
- pdcid_t des_obj_id;
- pdcid_t des_reg_id;
- /* Double linked list usage*/
- struct region_map_list *prev;
- struct region_map_list *next;
- };
- };
-
-
----------------------------
-Region info
----------------------------
-
-.. code-block:: c
-
- struct pdc_region_info {
- pdcid_t local_id;
- struct _pdc_obj_info *obj;
- size_t ndim;
- uint64_t *offset;
- uint64_t *size;
- bool mapping;
- int registered_op;
- void *buf;
- };
-
----------------------------
-Access type
----------------------------
-
-.. code-block:: c
-
- typedef enum { PDC_NA=0, PDC_READ=1, PDC_WRITE=2 }
-
----------------------------
-Query operators
----------------------------
-
-.. code-block:: c
-
- typedef enum {
- PDC_OP_NONE = 0,
- PDC_GT = 1,
- PDC_LT = 2,
- PDC_GTE = 3,
- PDC_LTE = 4,
- PDC_EQ = 5
- } pdc_query_op_t;
-
----------------------------
-Query structures
----------------------------
-
-.. code-block:: c
-
- typedef struct pdc_query_t {
- pdc_query_constraint_t *constraint{
- pdcid_t obj_id;
- pdc_query_op_t op;
- pdc_var_type_t type;
- double value; // Use it as a generic 64bit value
- pdc_histogram_t *hist;
-
- int is_range;
- pdc_query_op_t op2;
- double value2;
-
- void *storage_region_list_head;
- pdcid_t origin_server;
- int n_sent;
- int n_recv;
- }
- struct pdc_query_t *left;
- struct pdc_query_t *right;
- pdc_query_combine_op_t combine_op;
- struct pdc_region_info *region; // used only on client
- void *region_constraint; // used only on server
- pdc_selection_t *sel;
- } pdc_query_t;
-
----------------------------
-Selection structure
----------------------------
-
-.. code-block:: c
-
- typedef struct pdcquery_selection_t {
- pdcid_t query_id;
- size_t ndim;
- uint64_t nhits;
- uint64_t *coords;
- uint64_t coords_alloc;
- } pdc_selection_t;
-
----------------------------
-Developers notes
----------------------------
-
-* This note is for developers. It helps developers to understand the code structure of PDC code as fast as possible.
-* PDC internal data structure
-
- * Linkedlist
- * Linkedlist is an important data structure for managing PDC IDs.
- * Overall. An PDC instance after PDC_Init() has a global variable pdc_id_list_g. See pdc_interface.h
-
- .. code-block:: c
-
- struct PDC_id_type {
- PDC_free_t free_func; /* Free function for object's of this type */
- PDC_type_t type_id; /* Class ID for the type */
- // const PDCID_class_t *cls;/* Pointer to ID class */
- unsigned init_count; /* # of times this type has been initialized */
- unsigned id_count; /* Current number of IDs held */
- pdcid_t nextid; /* ID to use for the next atom */
- DC_LIST_HEAD(_pdc_id_info) ids; /* Head of list of IDs */
- };
-
- struct pdc_id_list {
- struct PDC_id_type *PDC_id_type_list_g[PDC_MAX_NUM_TYPES];
- };
- struct pdc_id_list *pdc_id_list_g;
-
- * pdc_id_list_g is an array that stores the head of linked list for each types.
- * The _pdc_id_info is defined as the followng in pdc_id_pkg.h.
-
- .. code-block:: c
-
- struct _pdc_id_info {
- pdcid_t id; /* ID for this info */
- hg_atomic_int32_t count; /* ref. count for this atom */
- void *obj_ptr; /* pointer associated with the atom */
- PDC_LIST_ENTRY(_pdc_id_info) entry;
- };
-
- * obj_ptr is the pointer to the item the ID refers to.
- * See pdc_linkedlist.h for implementations of search, insert, remove etc. operations
-
- * ID
- * ID is important for managing different data structures in PDC.
- * e.g Creating objects or containers will return IDs for them
-
- * pdcid_t PDC_id_register(PDC_type_t type, void *object)
- * This function maintains a linked list. Entries of the linked list is going to be the pointers to the objects. Every time we create an object ID for object using some magics. Then the linked list entry is going to be put to the beginning of the linked list.
- * type: One of the followings
-
- .. code-block:: c
-
- typedef enum {
- PDC_BADID = -1, /* invalid Type */
- PDC_CLASS = 1, /* type ID for PDC */
- PDC_CONT_PROP = 2, /* type ID for container property */
- PDC_OBJ_PROP = 3, /* type ID for object property */
- PDC_CONT = 4, /* type ID for container */
- PDC_OBJ = 5, /* type ID for object */
- PDC_REGION = 6, /* type ID for region */
- PDC_NTYPES = 7 /* number of library types, MUST BE LAST! */
- } PDC_type_t;
-
- * Object: Pointer to the class instance created (bad naming, not necessarily a PDC object).
-
-
- * struct _pdc_id_info *PDC_find_id(pdcid_t idid);
- * Use ID to get struct _pdc_id_info. For most of the times, we want to locate the object pointer inside the structure. This is linear search in the linked list.
- * idid: ID you want to search.
-
-* PDC core classes.
-
- * Property
- * Property in PDC serves as hint and metadata storage purposes.
- * Different types of object has different classes (struct) of properties.
- * See pdc_prop.c, pdc_prop.h and pdc_prop_pkg.h for details.
- * Container
- * Container property
-
- .. code-block:: c
-
- struct _pdc_cont_prop {
- /* This class ID is returned from PDC_find_id with an input of ID returned from PDC init. This is true for both object and container.
- *I think it is referencing the global PDC engine through its ID (or name). */
- struct _pdc_class *pdc{
- /* PDC class instance name*/
- char *name;
- /* PDC class instance ID. For most of the times, we only have 1 PDC class instance. This is like a global variable everywhere.*/
- pdcid_t local_id;
- };
- /* This ID is the one returned from PDC_id_register . This is a property ID type.
- * Some kind of hashing algorithm is used to generate it at property create time*/
- pdcid_t cont_prop_id;
- /* Not very important */ pdc_lifetime_t cont_life;
- };
-
- * Container structure (pdc_cont_pkg.h and pdc_cont.h)
-
- .. code-block:: c
-
- struct _pdc_cont_info {
- struct pdc_cont_info *cont_info_pub {
- /*Inherited from property*/
- char *name;
- /*Registered using PDC_id_register */
- pdcid_t local_id;
- /* Need to register at server using function PDC_Client_create_cont_id */
- uint64_t meta_id;
- };
- /* Pointer to container property.
- * This struct is copied at create time.*/
- struct _pdc_cont_prop *cont_pt;
- };
-
-
- * Object
-
- * Object property See `Object Property `_
- * Object structure (pdc_obj_pkg.h and pdc_obj.h) See `Object Structure `_
diff --git a/docs/source/appendix.rst b/docs/source/appendix.rst
new file mode 100644
index 000000000..f71bda51c
--- /dev/null
+++ b/docs/source/appendix.rst
@@ -0,0 +1,8 @@
+.. _appendix:
+
+**9.** Appendix
+===============
+
+Appendix A: Image 1
+Appendix B: Image 2
+Appendix C: API Overview
\ No newline at end of file
diff --git a/docs/source/client_api.rst b/docs/source/client_api.rst
new file mode 100644
index 000000000..26e376602
--- /dev/null
+++ b/docs/source/client_api.rst
@@ -0,0 +1,201 @@
+.. _client_api:
+
+**5.** Client API
+=================
+
+This section documents the main Client API for PDC.
+It includes the types, core layer functions, properties, containers,
+objects, and region management functions. Use the links below to
+quickly navigate to each subsection:
+
+- :ref:`client_api_types` - Types used in the Client API
+- :ref:`client_api_layer` - Initialization and shutdown functions for the PDC layer
+- :ref:`client_api_properties` - Functions for creating and closing properties
+- :ref:`client_api_containers` - Functions for creating and managing containers
+- :ref:`client_api_objects` - Functions for creating and managing objects
+- :ref:`client_api_object_tags` - Functions for creating and managing object tags
+- :ref:`client_api_regions` - Functions for creating and managing regions
+- :ref:`client_api_object_data_transfers` - Functions for object data transfers
+
+.. _client_api_types:
+
+**5.1.** Types
+--------------
+
+.. doxygentypedef:: pdcid_t
+ :project: PDC
+
+.. doxygentypedef:: perr_t
+ :project: PDC
+
+.. doxygentypedef:: pdc_var_type_t
+ :project: PDC
+
+.. doxygenenum:: pdc_prop_type_t
+ :project: PDC
+
+.. doxygenenum:: pdc_region_partition_t
+ :project: PDC
+
+.. doxygenenum:: pdc_lifetime_t
+ :project: PDC
+
+.. doxygenenum:: pdc_prop_type_t
+ :project: PDC
+
+.. doxygenenum:: pdc_consistency_t
+ :project: PDC
+
+.. _client_api_layer:
+
+**5.2.** PDC Layer
+------------------
+
+.. doxygenfunction:: PDCinit
+ :project: PDC
+
+.. doxygenfunction:: PDCclose
+ :project: PDC
+
+.. _client_api_properties:
+
+**5.3.** Properties
+-------------------
+
+.. doxygenfunction:: PDCprop_create
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_close
+ :project: PDC
+
+.. _client_api_containers:
+
+**5.4.** Containers
+-------------------
+
+.. doxygenfunction:: PDCcont_create
+ :project: PDC
+
+.. doxygenfunction:: PDCcont_create_col
+ :project: PDC
+
+.. doxygenfunction:: PDCcont_open
+ :project: PDC
+
+.. doxygenfunction:: PDCcont_open_col
+ :project: PDC
+
+.. doxygenfunction:: PDCcont_close
+ :project: PDC
+
+.. doxygenfunction:: PDCcont_persist
+ :project: PDC
+
+.. _client_api_objects:
+
+**5.5.** Objects
+----------------
+
+.. doxygenfunction:: PDCobj_create
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_create_mpi
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_open
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_open_col
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_close
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_get_info
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_obj_dup
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_type
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_dims
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_user_id
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_time_step
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_app_name
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_tags
+ :project: PDC
+
+.. doxygenfunction:: PDCprop_set_obj_transfer_region_type
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_iter_start
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_iter_null
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_iter_get_info
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_iter_next
+ :project: PDC
+
+.. _client_api_object_tags:
+
+**5.6.** Object Tags
+--------------------
+
+.. doxygenfunction:: PDCprop_set_obj_tags
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_put_tag
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_get_tag
+ :project: PDC
+.. _client_api_regions:
+
+**5.7.** Regions
+----------------
+
+.. doxygenfunction:: PDCregion_create
+ :project: PDC
+
+.. doxygenfunction:: PDCregion_close
+ :project: PDC
+
+.. _client_api_object_data_transfers:
+
+**5.8.** Object Data Transfers
+------------------------------
+
+.. doxygenfunction:: PDCregion_transfer_start
+ :project: PDC
+
+.. doxygenfunction:: PDCregion_transfer_wait
+ :project: PDC
+
+.. doxygenfunction:: PDCregion_transfer_close
+ :project: PDC
+
+.. doxygenfunction:: PDCregion_transfer_start_all
+ :project: PDC
+
+.. doxygenfunction:: PDCregion_transfer_wait_all
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_put_data
+ :project: PDC
+
+.. doxygenfunction:: PDCobj_get_data
+ :project: PDC
\ No newline at end of file
diff --git a/docs/source/client_tools.rst b/docs/source/client_tools.rst
new file mode 100644
index 000000000..9692d4a0c
--- /dev/null
+++ b/docs/source/client_tools.rst
@@ -0,0 +1,267 @@
+.. _client_tools:
+
+**6.** Client Tools
+===================
+
+The PDC Client Tools provide a set of command-line utilities for interacting with
+PDC metadata and datasets. These tools allow users to inspect metadata checkpoints,
+import data from HDF5 files into PDC, and export PDC checkpoints to external formats.
+They are intended to simplify common client-side operations and to provide insight
+into the contents and organization of PDC-managed objects. This section describes
+how to build the tools and details the primary commands available to users.
+
+**6.1** Building PDC Tools
+--------------------------
+
+.. code-block:: Bash
+
+ $ cd tools
+ $ cmake .
+ $ make
+
+**6.2** PDC Tool Commands
+-------------------------
+
+- :ref:`pdc_ls` - List objects in a PDC metadata checkpoint or directory of checkpoints.
+- :ref:`pdc_import` - Import HDF5 files into PDC by converting them to a PDC checkpoint.
+- :ref:`pdc_export` - Export a PDC metadata checkpoint to a specified file format (currently HDF5).
+
+pdc_ls
+~~~~~~
+
+.. _pdc_ls:
+
+Takes in a directory containing PDC metadata checkpoints or an individual metadata checkpoint file and outputs information on objects saved in the checkpoint(s).
+
+Usage: :code:`./pdc_ls `:
+
+Arguments:
+
+- :code:`-json `: Save the output to a specified file in JSON format.
+- :code:`-n `: Only display objects with a specific object name . Regex matching of object names is supported.
+- :code:`-i `: Only display objects with a specific object ID . Regex matching of object IDs is supported.
+- :code:`-ln`: List all object names as an additional field in the output.
+- :code:`-li`: List all object IDs as an additional field in the output.
+- :code:`-s`: Display summary statistics (number of objects, containers, regions) as an additional field in the output.
+
+Examples:
+
+.. code-block:: Bash
+
+ $ ./pdc_ls pdc_tmp -n id.*
+ [INFO] File [pdc_tmp/metadata_checkpoint.0] last modified at: Fri Mar 11 14:19:13 2022
+
+ {
+ "cont_id: 1000000": [
+ {
+ "obj_id": 1000007,
+ "app_name": "VPICIO",
+ "obj_name": "id11",
+ "user_id": 0,
+ "tags": "ag0=1",
+ "data_type": "PDC_INT",
+ "num_dims": 1,
+ "dims": [8388608],
+ "time_step": 0,
+ "region_list_info": [
+ {
+ "storage_loc": "/user/pdc_data/1000007/server0/s0000.bin",
+ "offset": 33554432,
+ "num_dims": 1,
+ "start": [0],
+ "count": [8388608],
+ "unit_size": 4,
+ "data_loc_type": "PDC_NONE"
+ }
+ ]
+ },
+ {
+ "obj_id": 1000008,
+ "app_name": "VPICIO",
+ "obj_name": "id22",
+ "user_id": 0,
+ "tags": "ag0=1",
+ "data_type": "PDC_INT",
+ "num_dims": 1,
+ "dims": [8388608],
+ "time_step": 0,
+ "region_list_info": [
+ {
+ "storage_loc": "/user/pdc_data/1000008/server0/s0000.bin",
+ "offset": 33554432,
+ "num_dims": 1,
+ "start": [0],
+ "count": [8388608],
+ "unit_size": 4,
+ "data_loc_type": "PDC_NONE"
+ }
+ ]
+ }
+ ]
+ }
+
+.. code-block:: Bash
+
+ $ ./pdc_ls pdc_tmp -n obj-var-p.* -ln -li
+ [INFO] File [pdc_tmp/metadata_checkpoint.0] last modified at: Fri Mar 11 14:19:13 2022
+
+ {
+ "cont_id: 1000000": [
+ {
+ "obj_id": 1000004,
+ "app_name": "VPICIO",
+ "obj_name": "obj-var-pxx",
+ "user_id": 0,
+ "tags": "ag0=1",
+ "data_type": "PDC_FLOAT",
+ "num_dims": 1,
+ "dims": [8388608],
+ "time_step": 0,
+ "region_list_info": [
+ {
+ "storage_loc": "/user/pdc_data/1000004/server0/s0000.bin",
+ "offset": 33554432,
+ "num_dims": 1,
+ "start": [0],
+ "count": [8388608],
+ "unit_size": 4,
+ "data_loc_type": "PDC_NONE"
+ }
+ ]
+ },
+ {
+ "obj_id": 1000005,
+ "app_name": "VPICIO",
+ "obj_name": "obj-var-pyy",
+ "user_id": 0,
+ "tags": "ag0=1",
+ "data_type": "PDC_FLOAT",
+ "num_dims": 1,
+ "dims": [8388608],
+ "time_step": 0,
+ "region_list_info": [
+ {
+ "storage_loc": "/user/pdc_data/1000005/server0/s0000.bin",
+ "offset": 33554432,
+ "num_dims": 1,
+ "start": [0],
+ "count": [8388608],
+ "unit_size": 4,
+ "data_loc_type": "PDC_NONE"
+ }
+ ]
+ },
+ {
+ "obj_id": 1000006,
+ "app_name": "VPICIO",
+ "obj_name": "obj-var-pzz",
+ "user_id": 0,
+ "tags": "ag0=1",
+ "data_type": "PDC_FLOAT",
+ "num_dims": 1,
+ "dims": [8388608],
+ "time_step": 0,
+ "region_list_info": [
+ {
+ "storage_loc": "/user/pdc_data/1000006/server0/s0000.bin",
+ "offset": 33554432,
+ "num_dims": 1,
+ "start": [0],
+ "count": [8388608],
+ "unit_size": 4,
+ "data_loc_type": "PDC_NONE"
+ }
+ ]
+ }
+ ],
+ "all_obj_names": ["obj-var-pxx", "obj-var-pyy", "obj-var-pzz"],
+ "all_obj_ids": [1000004, 1000005, 1000006]
+ }
+
+pdc_import
+~~~~~~~~~~
+
+.. _pdc_import:
+
+Takes in file containing line-separated paths to HDF5 files and converts those HDF5 files to a PDC checkpoint.
+
+Usage: :code:`./pdc_import `:
+
+Arguments:
+
+- :code:`-a `: Uses the specified as application name when creating PDC objects.
+- :code:`-o`: Specifies whether or not to overwrite pre-existing PDC objects when writing a PDC object that already exists.
+
+Examples:
+
+.. code-block:: Bash
+
+ $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap /path/to/pdc_server &
+ ==PDC_SERVER[0]: using [./pdc_tmp/] as tmp dir, 1 OSTs, 1 OSTs per data file, 0% to BB
+ ==PDC_SERVER[0]: using ofi+tcp
+ ==PDC_SERVER[0]: without multi-thread!
+ ==PDC_SERVER[0]: Read cache enabled!
+ ==PDC_SERVER[0]: Successfully established connection to 0 other PDC servers
+ ==PDC_SERVER[0]: Server ready!
+
+ $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap ./pdc_import file_names_list
+ ==PDC_CLIENT: PDC_DEBUG set to 0!
+ ==PDC_CLIENT[0]: Found 1 PDC Metadata servers, running with 1 PDC clients
+ ==PDC_CLIENT: using ofi+tcp
+ ==PDC_CLIENT[0]: Client lookup all servers at start time!
+ ==PDC_CLIENT[0]: using [./pdc_tmp] as tmp dir, 1 clients per server
+ Running with 1 clients, 1 files
+ Importer 0: I will import 1 files
+ Importer 0: [../../test.h5]
+ Importer 0: processing [../../test.h5]
+ Importer 0: Created container [/]
+
+ ==PDC_SERVER[0]: Checkpoint file [./pdc_tmp/metadata_checkpoint.0]
+ Import 8 datasets with 1 ranks took 0.93 seconds.
+
+pdc_export
+~~~~~~~~~~
+
+.. _pdc_export:
+
+Converts PDC metadata checkpoint to a file of specified format. Currently only HDF5 is supported.
+
+Usage: :code:`./pdc_export `:
+
+Arguments:
+
+- :code:`-f `: Uses the specified export . Currently only supports HDF5 exports.
+
+Examples:
+
+.. code-block:: Bash
+
+ $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap /path/to/pdc_server &
+ ==PDC_SERVER[0]: using [./pdc_tmp/] as tmp dir, 1 OSTs, 1 OSTs per data file, 0% to BB
+ ==PDC_SERVER[0]: using ofi+tcp
+ ==PDC_SERVER[0]: without multi-thread!
+ ==PDC_SERVER[0]: Read cache enabled!
+ ==PDC_SERVER[0]: Successfully established connection to 0 other PDC servers
+ ==PDC_SERVER[0]: Server ready!
+
+ $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap ./pdc_export pdc_tmp
+ ==PDC_CLIENT: PDC_DEBUG set to 0!
+ ==PDC_CLIENT[0]: Found 1 PDC Metadata servers, running with 1 PDC clients
+ ==PDC_CLIENT: using ofi+tcp
+ ==PDC_CLIENT[0]: Client lookup all servers at start time!
+ ==PDC_CLIENT[0]: using [./pdc_tmp] as tmp dir, 1 clients per server
+ [INFO] File [pdc_tmp/metadata_checkpoint.0] last modified at: Mon May 9 06:17:18 2022
+
+ POSIX read from file offset 117478480, region start = 0, region size = 8388608
+ POSIX read from file offset 130024208, region start = 0, region size = 8388608
+ POSIX read from file offset 130057104, region start = 0, region size = 8388608
+ POSIX read from file offset 130056720, region start = 0, region size = 8388608
+ POSIX read from file offset 130023696, region start = 0, region size = 8388608
+ POSIX read from file offset 130056592, region start = 0, region size = 8388608
+ POSIX read from file offset 130056720, region start = 0, region size = 8388608
+ POSIX read from file offset 130023696, region start = 0, region size = 8388608
+
+.. warning::
+
+ PDC tools currently do not support compound data types and will have unexpected
+ behavior when attempting to work with compound data types.
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
new file mode 100644
index 000000000..64ccaa3aa
--- /dev/null
+++ b/docs/source/contributing.rst
@@ -0,0 +1,144 @@
+.. _contributing:
+
+**8.** Contributing
+===================
+
+This page contains the standards for the PDC projects, covering coding, repository management, pull requests, documentation, and tests.
+
+**8.1.** Coding Standards
+-------------------------
+
+- PDC uses the ``clang-format`` check for all ``.c``, ``.h``, ``.cpp``, ``.hpp`` files.
+- PDC CI will ensure the code follows that format and push any changes that can be automatically fixed during this process.
+- Novel features must be accompanied by their respective tests:
+
+ - Unit tests
+ - Functionality and integration tests
+ - Examples
+ - Check if any of the new tests should be run as part of the CI
+ - Specification for the CI checklist integration
+ - Should be looked at in the review process
+ - Include as a checklist on the PR template
+
+- Novel features must be accompanied by their respective in-code documentation:
+
+ - Include as a checklist on the PR template
+ - **L1 (mandatory)**: function, short summary, params, return values (exceptions), TODO/future improvements
+ - **L2 (if needed)**: algorithm description, data structure description, complex code, optimizations
+ - Should be looked at in the review process
+
+**8.2.** Repository
+-------------------
+
+Branches Policy
+~~~~~~~~~~~~~~~
+
+- PDC officially has two main branches: ``stable`` and ``develop``.
+- To include your contributions in PDC, a few scenarios might arise:
+
+ - Everything has to go through an issue, and issue approval will determine if it goes into a fork or main repository (branch) (internal)
+ - Branch creation in the main repository should be discussed and approved
+ - If changes are in a cloned repository, after a while, if relevant, discuss moving to main repository in a specific branch
+
+- **Approved Overall process** (not the main repository):
+
+ 1. Create your own clone; **do NOT** create additional branches in the main repository for this
+ 2. Create a new branch based on the ``develop`` branch
+ 3. Make your changes
+ 4. Open a pull request to the official PDC repository into the ``develop`` branch
+
+- **All PRs must**:
+
+ - Have a clear description of the changes
+ - Have all discussions resolved
+ - Pass all tests in the public CI
+ - Branch should be updated with ``develop``
+ - Pass the NERSC CI test, after approval from a repository maintainer
+
+ .. note::
+ Whoever starts/approves this CI to start is responsible for checking the code to ensure it does not include any malicious code, especially from first-time contributors.
+
+ - Include documentation changes (if needed)
+ - Include tests (for new features)
+
+- Everything must first be merged into ``develop`` branch, and once a new release is made, that branch is synchronized with the ``stable`` one.
+- Release train, plan for the monthly meeting for this.
+
+**PR merge process approval:**
+
+- Two reviewers for ``develop``
+- Two reviewers for ``stable``
+- No bypass of the merging rules
+- Squash and Merge by default; exceptions may be requested
+- For PRs in ``develop`` branch, at least one approval is required to merge
+- For PRs in the ``stable`` branch, two approvals are required to merge
+
+Labels Policy
+~~~~~~~~~~~~~
+
+- PDC has a list of labels to be applied to issues and PRs:
+ https://github.com/hpc-io/pdc/labels
+
+- Update labels:
+
+ - Include type prefix for: ``bug``, ``CI``, ``documentation``, ``enhancement``, ``new feature``, ``question``, ``tests``
+ - Include decision prefix for: ``duplicate``, ``help wanted``, ``invalid``, ``wontfix``
+
+- **Issue Title Format**::
+
+ [Proposed Due Date][Priority][Type][Composer ID][Title][Related PR#]
+
+ Example:
+ [2023/06/23][Medium][DOC][PR#49][wzhang] Dart Integration - missing documentation
+ [2023/06/23][Low][DOC][N/A][wzhang] Periodical Format Check
+
+- Most go to labels, others such as PR and Issues references should be disclosed by reference.
+- Before working on an issue, make sure it has the appropriate labels.
+- Every change should start with an issue.
+- When opening a PR and reviewing one, make sure labels are there and correctly reflect the content.
+
+Pull Request Template
+---------------------
+
+.. code-block:: md
+
+ # Related Issues / Pull Requests
+
+ List all related issues and/or pull requests if there are any.
+
+ # Description
+
+ Include a brief summary of the proposed changes.
+
+ # What changes are proposed in this pull request?
+
+ - [ ] Bug fix
+ - [ ] New feature
+ - [ ] Breaking change
+ - [ ] Documentation update
+
+ # Checklist:
+
+ - [ ] My code modifies existing public API, or introduces new public API, and I updated or wrote docstrings
+ - [ ] I have commented my code
+ - [ ] My code requires documentation updates, and I have made corresponding changes
+ - [ ] I have added tests
+ - [ ] All unit tests pass locally with my changes
+
+Issues
+------
+
+- **Implemented**: PDC has four templates to help create issues. Users can still open blank issues.
+
+**Bug Report template:**
+
+.. code-block:: md
+
+ **Bug Report**
+ A clear and concise description of what the bug is.
+
+ **To Reproduce**
+ How are you building/running PDC?
+
+ ```bash
+ ...
\ No newline at end of file
diff --git a/docs/source/core_concepts.rst b/docs/source/core_concepts.rst
new file mode 100644
index 000000000..1cbb70b54
--- /dev/null
+++ b/docs/source/core_concepts.rst
@@ -0,0 +1,181 @@
+.. _core_concepts:
+
+**2.** Core Concepts
+====================
+
+**2.1.** Architecture of PDC
+----------------------------
+
+PDC is built on a distributed client-server architecture optimized for
+high-performance computing (HPC) environments. In this model, clients
+are user processes that interact with the PDC client library and API
+to initiate data creation, movement, querying, and transformation.
+Servers are background processes that carry out these operations as
+requested by the clients. Communication between clients and servers
+is handled via Mercury RPCs and, when enabled, MPI. This architecture
+supports scalable, asynchronous, and metadata-rich operations that decouple
+the data model from physical data location.
+
+Data Management and Movement
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+PDC manages both data and metadata in a way that optimizes movement across
+deep memory hierarchies. Data is stored in objects and moved asynchronously
+through region-based APIs, while metadata is distributed and indexed to support
+scalable querying. Key features of this model include asynchronous data transfers,
+region-aware memory binding, and automatic coordination between memory and
+storage locations.
+
+**2.2.** PDC abstractions
+-------------------------
+
+PDC provides several core abstractions for modeling and managing data.
+
+
+.. figure:: ../_static/image/pdc-containers-objects-regions.png
+ :alt: PDC containers, objects, and regions
+ :align: center
+ :class: bordered-image
+ :scale: 70%
+
+ Relationship between PDC containers, objects, and regions.
+
+Containers
+~~~~~~~~~~
+
+- Logical groupings of related data objects, similar to folders or directories
+- Associated with metadata such as creation time and persistance
+
+Objects
+~~~~~~~
+
+- Represent a combination of raw data and descriptive metadata
+- Structured as multidimensional arrays (e.g., 1D, 2D, or 3D) to support scientific data layouts
+- Can be queried, updated, and transferred without referencing physical storage directly
+
+Regions
+~~~~~~~
+
+- Represent the fundamental unit of data access in PDC
+- Defined as sub-sections within an object
+- Defined as multidimensional sub-sections within an object
+- Used to read or write portions of data during transfers
+
+.. note::
+ PDC currently supports a maximum of 3 dimensions for both objects and regions.
+
+**2.3.** Properties
+-------------------
+
+Properties determine how objects, containers, etc., behave.
+A property list is created using ``PDCprop_create(x)``, where ``x`` is
+the entity type (``PDC_CONTAINER``, ``PDC_OBJ``, etc.). Once created, the
+property list can be configured through additional function calls that
+append or modify properties. These customized property lists can then be
+used in later calls to control the behavior of the associated entities. Some
+examples of configurable properties are shown below.
+
+Container Properties
+~~~~~~~~~~~~~~~~~~~~
+
+Container properties define key attributes that determine the behavior and
+lifecycle of containers within the system. These properties are typically
+specified at container creation time and can be queried or modified via
+container-related functions.
+
+- **Lifetime**
+ Containers can be created with a specified lifetime, such as *persistent* or *transient*.
+ Persistent containers remain accessible across multiple sessions, whereas transient containers
+ exist only for the duration of a program's execution.
+
+- **Creation and Opening**
+ Containers can be created and opened either individually or collectively (across multiple ranks),
+ enabling both independent and coordinated container management.
+
+- **Information and Iteration**
+ Once created or opened, container properties and metadata can be retrieved through information
+ query functions. Containers can also be iterated over to discover all containers within a
+ given context.
+
+- **Persistence Control**
+ Transient containers can be explicitly persisted to extend their lifetime beyond the current execution.
+
+- **Initialization and Finalization**
+ The container subsystem provides explicit initialization and finalization calls to manage resources properly.
+
+Object Properties
+~~~~~~~~~~~~~~~~~
+
+Object properties characterize the essential attributes and behavior of objects managed within containers.
+These properties define the shape, type, location, and metadata associated with an object, enabling
+precise control over how the object is created, accessed, and managed.
+
+- **Initialization and Creation**
+ Objects are initialized within a container context and can be created either locally or collectively,
+ allowing flexibility in parallel or distributed environments.
+
+- **Data Type and Dimensionality**
+ Properties specify the variable type of the object data (such as integer or float) as well as its
+ dimensions, which can include fixed sizes or support for unlimited dimensions.
+
+- **Metadata and Tags**
+ Objects can carry associated metadata such as user IDs, application names, time steps, data
+ location paths, and user-defined tags to facilitate identification and management.
+
+- **Consistency and Partitioning**
+ Properties include options to define consistency semantics and data transfer partitioning
+ strategies, helping optimize performance and correctness in concurrent access scenarios.
+
+- **Buffers and Caching**
+ Objects can be linked with data buffers, and explicit control over cache flushing is
+ supported to ensure data integrity and synchronization.
+
+- **Lifecycle and Management**
+ Object properties support lifecycle operations such as opening, closing, iterating over
+ multiple objects within a container, and deleting objects when no longer needed.
+
+- **Query and Modification**
+ Functions allow querying and modifying object properties and dimensions dynamically, supporting
+ evolving data and usage patterns.
+
+**2.4.** Data Acess Lifecycle
+-----------------------------
+
+The typical workflow for interacting with PDC objects is described below.
+
+Object Creation
+~~~~~~~~~~~~~~~
+
+1. Define object properties (datatype, size, etc.).
+2. Create or select a container.
+3. Call PDCobj_create() to allocate the object with metadata.
+
+Allocation of Regions & Containers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. Use PDCregion_create() to define the region to write or read from.
+2. The region is tied to an object and a memory location.
+3. Multiple regions can be created for batch or parallel operations.
+
+Asynchronous & Synchronous Data Transfers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Asynchronous**
+
+1. Initiated using PDCbuf_obj_map()
+2. Completion checked with PDCregion_transfer_start() and PDCregion_transfer_wait()
+3. Enables overlapping of computation and communication
+
+**Synchronous**
+
+1. Blocking operations that ensure data is transferred before proceeding
+2. Useful for simple or sequential workflows
+
+Finalization
+~~~~~~~~~~~~
+
+Once all of the operations are completed:
+
+1. Call PDCobj_close() and PDCcont_close() to release all handles
+2. Use PDCclose() to clean up the PDC environment
+3. Ensure that all resources have been flushed and deallocated
\ No newline at end of file
diff --git a/docs/source/developer-notes.rst b/docs/source/developer-notes.rst
deleted file mode 100644
index a5bace3ae..000000000
--- a/docs/source/developer-notes.rst
+++ /dev/null
@@ -1,615 +0,0 @@
-================================
-Developer Notes
-================================
-
-+++++++++++++++++++++++++++++++++++++++++++++
-PDC Logging
-+++++++++++++++++++++++++++++++++++++++++++++
-
-The following macros are defined in the header file `src/commons/logging/include/pdc_logger.h` and should be used instead of `printf` for logging.
-
-The available macros, which follow a similar usage to `printf`, are:
-
-.. code-block:: C
-
- LOG_ERROR
- LOG_WARNING
- LOG_INFO
- LOG_DEBUG
- LOG_JUST_PRINT
-
-Each of these macros automatically prepends a timestamp and log level prefix to the output:
-
-.. code-block:: C
-
- [year-month-day hour:minute:second] [ERROR]
- [year-month-day hour:minute:second] [WARNING]
- [year-month-day hour:minute:second] [INFO]
- [year-month-day hour:minute:second] [DEBUG]
-
-`LOG_JUST_PRINT` does not print a prefix and outputs the message as-is. This macro is useful when you need to print raw messages without log metadata.
-
-+++++++++++++++++++++++++++++++++++++++++++++
-PDC Server Metadata Management
-+++++++++++++++++++++++++++++++++++++++++++++
-
-PDC metadata servers, a subset of PDC servers, store metadata for PDC classes such as objects and containers. PDC data server, also a subset of PDC servers (potentially overlapping with PDC metadata server), manages data from users. Such management includes server local caching and I/O to the file system. Both PDC metadata and data servers have some local metadata.
-
----------------------------------------------
-PDC Metadata Structure
----------------------------------------------
-
-PDC metadata is held in server memories. When servers are closed, metadata will be checkpointed into the local file system. Details about the checkpoint will be discussed in the metadata implementation section.
-
-PDC metadata consists of three major parts at the moment:
-
-- Metadata stored in the hash tables at the metadata server: stores persistent properties for PDC containers and PDC objects. When objects are created, these metadata are registered at the metadata server using mercury RPCs.
-
-- Metadata query class at the metadata server: maps an object region to a data server, so clients can query for this information to access the corresponding data server. It is only used by dynamic region partition strategy
-
-- Object regions stored at the data server: this includes file names and region chunking information inside the object file on the file system.
-
----------------------------------------------
-Metadata Operations at Client Side
----------------------------------------------
-
-In general, PDC object metadata is initialized when an object is created. The metadata stored at the metadata server is permanent. When clients create the objects, a PDC property is used as one of the arguments for the object creation function. Metadata for the object is set by using PDC property APIs. Most of the metadata are not subject to any changes. Currently, we support setting/getting object dimensions using object API.
-
----------------------------------------------
-PDC Metadata Management Strategy
----------------------------------------------
-
-This section discusses the metadata management approaches of PDC. First, we briefly summarize how PDC managed metadata in the past. Then, we propose new infrastructures for metadata management.
-
-
-Managing Metadata and Data by the Same Server
----------------------------------------------
-
-Historically, a PDC server manages both metadata and data for objects it is responsible for. A client forwards I/O requests to the server computed based on MPI ranks statically. If a server is located on the same node as the client, the server will be chosen with a higher priority. This design can achieve high I/O parallelism if the I/O workloads from all clients are well-balanced. In addition, communication contention is minimized because servers are dedicated to serving disjoint subsets of clients.
-
-However, this design has two potential drawbacks. The first disadvantage is supporting general I/O access. For clients served by different PDC servers, accessing overlapping regions is infeasible. Therefore, this design is specialized in applications with a non-overlapping I/O pattern. The second disadvantage is a lack of dynamic load-balancing mechanisms. For example, some applications use a subset of processes for processing I/O. A subset of servers may stay idle because the clients mapped to them are not sending I/O requests.
-
-
-Separate Metadata Server from Data Server
----------------------------------------------
-
-Metadata service processes are required for distributed I/O applications with a one-sided communication design. When a client attempts to modify or access an object, metadata provides essential information such as object dimensions and the data server rank that contains the regions of interest. A PDC client generally does not have the runtime global metadata information. As a result, the first task is to obtain the essential metadata of the object from the correct metadata server.
-
-Instead of managing metadata and data server together, we can separate the metadata management from the region I/O. A metadata server stores and manages all attributes related to a subset of PDC objects. A PDC server can be both a metadata and data server. However, the metadata and data can refer to different sets of objects.
-
-This approach's main advantage is that the object regions' assignment to data servers becomes flexible. When an object is created, the name of the object maps to a unique metadata server. In our implementation, we adopt string hash values for object names and modulus operations to achieve this goal. The metadata information will be registered at the metadata server. Later, when other clients open the object, they can use the object's name to locate the same metadata server.
-
-When a client accesses regions of an object, the metadata server informs the client of the corresponding data servers it should transfer its I/O requests. Metadata servers can map object regions to data servers in a few different methods.
-
----------------------------------------------
-PDC Metadata Management Implementation
----------------------------------------------
-
-This section discusses how object metadata is implemented in the PDC production library. The following figure illustrates the flow of object metadata for different object operations. We label the 4 types of metadata in bold.
-
-Create Metadata
----------------------------------------------
-
-Metadata for an object is created by using a PDC property. PDC property is created using client API ``PDCprop_create(pdc_prop_type_t type, pdcid_t pdc_id)``. After a property instance is created, it is possible to set elements in this property using object property APIs. An alternative way is to use ``pdcid_t PDCprop_obj_dup(pdcid_t prop_id)``, which copies all the existing entries in a property to a new object instance.
-
-Binding Metadata to Object
----------------------------------------------
-
-Metadata is attached to an object at the object creation time. ``PDCobj_create(pdcid_t cont_id, const char *obj_name, pdcid_t obj_prop_id)`` is the prototype for binding an object property when an object is created.
-
-Register Object Metadata at Metadata Server
----------------------------------------------
-
-Once an object is created locally at a client, the object metadata is sent to the corresponding metadata server based on the hash value computed from the object name. Internally, search for ``typedef struct pdc_metadata_t {...} pdc_metadata_t;`` in the ``pdc_client_server_common.h`` file. This data structure contains essential metadata about the object, such as its dimension and name.
-
-Retrieve Metadata from Metadata Server
----------------------------------------------
-
-Object metadata can be obtained from the metadata server when clients open an object using the prototype ``pdcid_t PDCobj_open(const char *obj_name, pdcid_t pdc)``. The client contacts the corresponding metadata server to retrieve data from the data type ``pdc_metadata_t`` stored at the server.
-
-Object Metadata at Client
----------------------------------------------
-
-The current implementation stores metadata at the client in two separate places due to historical reasons. Both places can be accessed from the data type ``struct _pdc_obj_info*``, which is a data type defined in ``pdc_obj_pkg.h``.
-
-We can generally use ``struct _pdc_id_info *PDC_find_id(pdcid_t obj_id)`` to locate the object info pointer ``obj``. Then, ``(struct _pdc_obj_info )(obj->obj_ptr)`` allows use to obtain the ``struct _pdc_obj_info`` structure. We call this pointer ``obj_info_ptr``.
-The first piece of local metadata, denoted as metadata buffer, is stored in ``obj_info_ptr->metadata``. This value is a pointer that represents ``pdc_metadata_t``. Its content matches the values stored at the metadata server side exactly. For object create, we copy the data from the pointer to the server memory using mercury RPCs. For object open, we copy from server memory to client memory.
-
-The second piece of local metadata, denoted as object public property, is stored in ``obj_info_ptr->obj_pt``, which has type ``struct pdc_obj_prop`` defined in the ``pdc_prop.h`` file. The values in this data type are copied from the first piece. This metadata data type contains essential information, such as object dims and region partition types.
-
-Metadata at Data Server
----------------------------------------------
-
-Details about the data server will not be discussed in this section. In general, a data server takes inputs (both metadata and data for an object) from clients and processes them accordingly. It is not supposed to store metadata information for objects. However, it is responsible for storing the locations of data in the file system, including path and offset for regions.
-
-If server cache is enabled, object dimension is stored by the server cache infrastructure when an object is registered for the first time. Object dimension is not used anywhere unless the I/O mode is set to be canonical file order storage. Currently, this mode does not allow clients to change object dimension, so it is not subject to metadata update, which is discussed in the following subsection.
-
-Object Metadata Update
----------------------------------------------
-
-Object metadata is defined before creating an object. At the early stage of PDC, we did not plan to change any of the metadata after an object was created. However, it may be necessary to do this in the future. For example, sometimes applications want to change the sizes of PDC objects along different dimensions. An example is implemented as ``perr_t PDCobj_set_dims(pdcid_t obj_id, int ndim, uint64_t *dims)``. This function can change object dimensions in runtime. As mentioned earlier, we need to update the metadata in three places. Two places are at the client side, and the other place is at the metadata server.
-
-Object Region Metadata
----------------------------------------------
-
-Region metadata is required for dynamic region partitioning. Dynamic region partitioning strategy at the metadata server assigns data server IDs for regions in runtime. The file ``pdc_server_region_transfer_metadata_query.c`` implements the assignments of data server ID for individual regions. For dynamic region partition and local region partition strategies, a metadata server receives client region transfer requests. The metadata server returns a data server ID to the client so the client can send data to the corresponding data server. Details about how the client connects to the metadata server will be discussed in the implementation of the region transfer request.
-
-Metadata Checkpoint
----------------------------------------------
-
-When PDC servers are closed, metadata stored by metadata servers is saved to the file system. Later, when users restart the servers, essential metadata are read back to the memory of the metadata server. In general, client applications should not be aware of any changes if servers are closed and restarted. This subsection layout the data format of PDC metadata when they are checkpointed.
-
-Implementation of server checkpoint is in the function ``PDC_Server_checkpoint``, and the corresponding restart is in the function ``PDC_Server_restart(char *filename)``. The source file is ``pdc_server.c``.
-
-There are four categories of metadata to be checkpointed. One category is concatenated after another seamlessly. We demonstrate the first three categories of metadata in the following figures. Before each bracket, an integer value will indicate the number of repetitions for contents in the brackets. Contents after the bracket will start from the next byte after the last repetition for contents in the bracket. The last category is managed by an independent module ``pdc_server_region_transfer_metadata_query.c``. The content of the metadata is subject to future changes.
-
-!!!!!
-
-Region metadata checkpoint is placed at the end of the server checkpoint file, right after the last byte of data server region. Function ``transfer_request_metadata_query_checkpoint(char **checkpoint, uint64_t *checkpoint_size)`` in ``pdc_server_region_transfer_metadata_query.c`` file handles the wrapping of region metadata.
-
----------------------------------------------
-Metadata Search and Its Implementation
----------------------------------------------
-
-For Metadata search, we current provide no-index approaches and index-facilitated approaches.
-For either of these approaches, we consider two types of communication model : point-to-point and collective.
-
-Point-to-point communication model is for distributed applications where each single client may not follow the exact same workflows, and the timing for them to trigger a metadata search function call can be really random. In this case, each client contacts one or more metadata servers and get the complete result.
-Collective communication model applies when a typical application is running. In such an application, each rank follows the exact same workflow and they may trigger a metadata search function call at the same time and the metadata search requests are sent from these clients collectively. In this case, each rank contacts one metadata server and retrieves partial result. Then these clients have to communicate with each other to get the complete result.
-
-No-index Approach
----------------------------------------------
-
-For No-index approach, here are the APIs you can call for different communication models:
- * PDC_Client_query_kvtag (point-to-point)
- * PDC_Client_query_kvtag_mpi (collective)
-
-The default PDC kvtags are stored within each object's metadata as a linked list, and any query involves traversing the list in memory.
-
-We have additional support to manage the kvtags with RocksDB and SQLite. With this approach, each PDC server creates and accesses its own RocksDB and SQLite database file, which is stored as an in-memory file in /tmp directory. When RocksDB or SQLite is enabled with setting the environment variable ``PDC_USE_ROCKSDB=1`` or ``PDC_USE_SQLITE3=1``.
-With the RocksDB implementation, each kvtag is stored as a RocksDB key-value pair. To differenciate the kvtags for different objects, we encode the object ID to the key string used for the RocksDB, and store the value as the RocksDB value. As a result, the value can be retrieved directly when its object ID and key string is known. Otherwise we must iterate over the entire DB to search for an kvtag.
-With the SQLite3 implementation, each kvtag is inserted as a row in a SQLite3 table. Currently, the table has the following columns and SQLite3 datatypes: objid (INTEGER), name (TEXT), value_text(TEXT), value_int(INTEGER), value_float(REAL), value_double(REAL), value_blob(BLOB). We create a SQL SELECT statement automatically on the server when receiving a query request from the PDC client. Currently this implementation is focused on supporting string/text affix search and integer/float (single) value match search.
-Currently, both the RocksDB and the SQLite implementation are developed for benchmarking purpose, the database files are removed at server finalization time, and restart is not supported.
-
-Index-facilitated Approach
----------------------------------------------
-
-For index-facilitated approach, here are the APIs you can call for different communication models:
- * PDC_Client_search_obj_ref_through_dart (point-to-point)
- * PDC_Client_search_obj_ref_through_dart_mpi (collective)
-
-Before using these APIs, you need to create your index first, so please remember to call `PDC_Client_insert_obj_ref_into_dart` right after a successful function call of `PDCobj_put_tag`.
-
-Note for the query string:
-
- * For String Queries:
- * The query string can be of the following format:
- * String Queries:
- * 1. Exact: key=\"value\"
- * 2. Prefix: key*=\"value*\"
- * 3. Suffix: *key=\"*value\"
- * 4. Infix: *key*=\"*value*\"
- *
- * For Number Queries:
- * 1. Exact: key=value
- * 2. Range: key=value1|~|value2 (inclusive on both ends, '|' stands for inclusion)
- * 3. Range: key=value1|~ (inclusive on the lower end)
- * 4. Range: key=~|value2 (inclusive on the upper end)
- * 5. Range: key=value1~value2 (exclusive on both ends)
- * 6. Range: key=~value2 (exclusive on the upper end)
- * 7. Range: key=value1~ (exclusive on the lower end)
-
-See the API documentation for more details about the usage of these APIs.
-
-
-------------------------------------------------------------
-DART Suffix Tree Mode
-------------------------------------------------------------
-
-In DART, to support efficient infix search, we can enable the suffix tree mode,
-where suffix search becomes an exact search and infix search becomes a prefix search,
-at the cost of indexing every possible suffix of indexed keywords.
-
-To enable the suffix tree mode, you can turn on/off this switch in CMakeLists.txt:
-`PDC_DART_SUFFIX_TREE_MODE`
-
--------------------------------------------------------------------
-BULKI for dynamic data structure and serialization/deserialization
--------------------------------------------------------------------
-
-In `src/commons/serde`, we implement the `BULKI` module for serialization and deserialization.
-
-BULKI is a data serialization and deserialization framework that supports dynamic and nested data structure.
-Every BULKI is considered a container of KEY-VALUE pairs, where every key/value is a BULKI_Entity
-BULKI_Entity is a container of one of the following:
-
-1. singleton of primitive data types, e.g. int, float, double, etc.
-2. array of primitive data types, e.g. int[3], float[32], double[22], etc.
-3. singleton of BULKI_Entity
-4. array of BULKI_Entity
-
-For more details, please refer to the `BULKI` API documentation and `bulki_serde_test.c` in `src/commons/serde`.
-
-+++++++++++++++++++++++++++++++++++++++++++++
-Object and Region Management
-+++++++++++++++++++++++++++++++++++++++++++++
-
-This section discusses how PDC manages objects and regions.
-
----------------------------------------------
-Static Object Region Mappings
----------------------------------------------
-
-A metadata server can partition the object space evenly among all data servers. For high-dimensional objects, it is possible to define block partitioning methods similar to HDF5s's chunking strategies.
-
-The static object region partitioning can theoretically achieve optimal parallel performance for applications with a balanced workload. In addition, static partitioning determines the mapping from object regions to data servers at object create/open time. No additional metadata management is required.
-
----------------------------------------------
-Dynamic Object Region Mappings
----------------------------------------------
-
-For applications that access a subset of regions for different objects, some data servers can stay idle while the rest are busy fetching or storing data for these regions concentrated around coordinates of interest. Dynamic object partitioning allows metadata servers to balance data server workloads in runtime. The mapping from object regions to the data server is determined at the time of starting region transfer request time.
-Partitioning object regions dynamically increases the complexity of metadata management. For example, a read from one client 0 after a write from another client 1 on overlapping regions demands metadata support. Client 0 has to locate the data server to which client 1 writes the region data using information from the metadata server. As a result, metadata servers must maintain up-to-date metadata of the objects they manage. There are a few options we can implement this feature.
-
-*Option 1*: When a client attempts to modify object regions, the client can also send the metadata of this transfer request to the metadata server. Consequently, the metadata server serving for the modified objects always has the most up-to-date metadata.
-
-Advantage: No need to perform communications between the servers (current strategy)
-Disadvantage: The metadata server can be a bottleneck because the number of clients accessing the server may scale up quickly.
-
-*Option 2*: When a data server receives region transfer requests from any client, the data server forwards the corresponding metadata to the metadata server of the object.
-
-Advantage: The number of servers is less than the number of clients, so we are reducing the chance of communication contention
-Disadvantage: Server-to-server RPC infrastructures need to be put in place.
-
-*Option 3*: Similar to Option 2, but the data server will modify a metadata file. Later, a metadata server always checks the metadata file for metadata information updates.
-
-Advantage: No communications are required if a metadata file is used.
-Disadvantage: Reading metadata files may take some time. If multiple servers are modifying the same metadata file, how should we proceed?
-
-The following table summarizes the communication of the three mapping methods from clients to types of PDC servers when different PDC functions are called.
-
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-| | Static Object Mapping | Dynamic Object Mapping & Static Region Mapping | Dynamic Object Mapping & Dynamic Region Mapping |
-+===============================+=============================================+===================================================+===================================================+
-| ``PDC_obj_create`` | Client - Metadata Server | Client - Metadata Server | Client - Metadata Server |
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-| ``PDC_obj_open`` | Client - Metadata Server | Client - Metadata Server | Client - Metadata Server |
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-| ``PDC_region_transfer_start`` | Client - Data Server | Client - Data Server | Client - Data Server |
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-| ``PDC_region_transfer_start`` | Client - Data Server | Client - Data Server | Client - Metadata Server (Option 1) |
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-| ``PDC_region_transfer_start`` | Client - Data Server | Client - Data Server | Data Server - Metadata Server (Option 2) |
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-| ``PDC_region_transfer_wait`` | Data Server - Client (PDC_READ) | Data Server - Client (PDC_READ) | Data Server - Client (PDC_READ) |
-+-------------------------------+---------------------------------------------+---------------------------------------------------+---------------------------------------------------+
-
-
----------------------------------------------
-Region Transfer Request at Client
----------------------------------------------
-!!!!!
-
-This section describes how the region transfer request module in PDC works. The region transfer request module is the core of PDC I/O. From the client's point of view, some data is written to regions of objects through transfer request APIs. PDC region transfer request module arranges how data is transferred from clients to servers and how data is stored at servers.
-
-PDC region: A PDC object abstracts a multi-dimensional array. The current implementation supports up to 3D. A PDC region can be used to access a subarray of the object. A PDC region describes the offsets and lengths to access a multi-dimensional array. Its prototype for creation is ``PDCregion_create(psize_t ndims, uint64_t *offset, uint64_t *size)``. The input values to this create function will be copied into PDC internal memories, so it is safe to free the pointers later.
-
-Region Transfer Request Create and Close
----------------------------------------------
-
-Region transfer request create function has prototype ``PDCregion_transfer_create(void *buf, pdc_access_t access_type, pdcid_t obj_id, pdcid_t local_reg, pdcid_t remote_reg)``. The function takes a contiguous data buffer as input. Content in this data buffer will be stored in the region described by ``remote_reg`` for objects with ``obj_id``. Therefore, ``remote_reg`` has to be contained in the dimension boundaries of the object. The transfer request create function copies the region information into the transfer request's memory, so it is safe to immediately close both ``local_reg`` and ``remote_reg`` after the create function is called.
-
-``local_reg`` describes the shape of the data buffer, aligning to the object's dimensions. For example, if ``"local_reg`` is a 1D region, the start index of the buf to be stored begins at the ``offset[0]`` of the ``local_reg``, with a size of ``size[0]``. Recall that ``offset`` and ``size`` are the input argument. If ``local_reg`` has dimensions larger than 1, then the shape of the data buffer is a subarray described by ``local_reg`` that aligns with the boundaries of object dimensions. In summary, ``local_reg`` is analogous to HDF5's memory space. ``remote_reg`` is parallel to HDF5's data space for data set I/O operations.
-
-``PDCregion_transfer_close(pdcid_t transfer_request_id)`` is used to clean up the internal memories associated with the ``"transfer_request_id``.
-
-Both create and close functions are local memory operations, so no mercury modules will be involved.
-
-Region Transfer Request Start
----------------------------------------------
-
-Starting a region transfer request function will trigger the I/O operation. Data will be transferred from client to server using the ``pdc_client_connect`` module. ``pdc_client_connect`` module is a middleware layer that transfers client data to a designated server and triggers a corresponding RPC at the server side. In addition, the RPC transfer also allows data transfer by argument. Variables transferred by argument are fixed-sized. For variable-sized variables, mercury bulk transfer is used to transfer a contiguous memory buffer. Region transfer request start APIs: To transfer metadata and data with the pdc_client_connect module, the ``region_transfer_request.c`` file contains mechanisms to wrap request data into a contiguous buffer. There are two ways to start a transfer request. The first prototype is ``PDCregion_transfer_start(pdcid_t transfer_request_id)``. This function starts a single transfer request specified by its ID. The second way is to use the aggregated prototype ``PDCregion_transfer_start_all(pdcid_t *transfer_request_id, int size)``. This function can start multiple transfer requests. It is recommended to use the aggregated version when multiple requests can start together because it allows both client and server to aggregate the requests and achieve better performance.
-
-For the 1D local region, ``PDCregion_transfer_start`` passes the pointer pointing to the ``offset[0] * unit`` location of the input buffer to the ``pdc_client_connect`` module. User data will be copied to a new contiguous buffer for higher dimensions using subregion copy based on local region shape. This implementation is in the static function ``pack_region_buffer``. The new memory buffer will be passed to the ``pdc_client_conenct`` module.
-
-This memory buffer passed to the ``pdc_client_connect`` module is registered with mercury bulk transfer. If it is a read operation, the bulk transfer is a pull operation. Otherwise, it is a push operation. Remote region information and some other relevant metadata are transferred using mercury RPC arguments. Once the ``pdc_client_connect`` module receives a return code and remote transfer request ID from the designated data server, ``PDCregion_transfer_start`` will cache the remote transfer request ID and exit.
-
-``PDCregion_transfer_start`` can be interpreted as ``PDCregion_transfer_start_all`` with the size argument set to 1, though the implementation is optimized. ``PDCregion_transfer_start_all`` performs aggregation of mercury bulk transfer whenever it is possible. Firstly, the function splits the read and write requests. Write requests are handled before the read requests. Wrapping region transfer requests to internal transfer packages: For each of the write requests, it is converted into one or more instances of the structure described by ``pdc_transfer_request_start_all_pkg`` defined in ``pdc_region_transfer.c``. This structure contains the data buffer to be transferred, remote region shapes, and a data server rank to be transferred to. ``PDCregion_transfer_start_all`` implements the package translation in the static function ``prepare_start_all_requests``.
-
-As mentioned earlier in the metadata implementation, an abstract region for an object can be partitioned in different ways. There are four types of partitions: Object static partitioning, region static partitioning, region dynamic partitioning, and node-local region placement. ``PDCprop_set_obj_transfer_region_type(pdcid_t obj_prop, pdc_region_partition_t region_partition)`` allows users to set the partition method before creating an object on the client side. Different partitioning strategies have differences in the target data server rank when a transfer request is packed into ``pdc_transfer_request_start_all_pkg`` (s). We describe them separately.
-
-For the object static partitioning strategy, the input transfer request is directly packed into ``pdc_transfer_request_start_all_pkg`` using a one-to-one mapping. The data server rank is determined at the object create/open time.
-
-For dynamic region partitioning or node-local placement, the static function ``static perr_t register_metadata`` (in ``pdc_region_transfer.c``) contacts the metadata server. The metadata server dynamically selects a data server for the input region transfer request based on the current system status. If local region placement is selected, metadata servers choose the data server on the same node (or as close as possible) of the client rank that transferred this request. If dynamic region partitioning is selected, the metadata server picks the data server currently holding the minimum number of bytes of data. The metadata server holds the region to data server mapping in its metadata region query system ``pdc_server_region_transfer_metadata_query.c``. Metadata held by this module will be permanently stored in the file system as part of the metadata checkpoint file at the PDC server close time. After retrieving the correct data server ID, one ``pdc_transfer_request_start_all_pkg`` is created. The only difference in creating ``pdc_transfer_request_start_all_pkg`` compared with the object static partitioning strategy is how the data server ID is retrieved.
-
-For the static region partitioning strategy, a region is equally partitioned across all data servers. As a result, one region transfer request generates the number of ``pdc_transfer_request_start_all_pkg`` equal to the total number of PDC servers. This implementation is in the static function ``static_region_partition`` in the ``pdc_region_transfer_request.c`` file.
-
-Sending internal transfer request packages from client to server: For an aggregated region transfer request start all function call, two arrays of ``pdc_transfer_request_start_all_pkg`` are created as described in the previous subsection depending on the partitioning strategies. One is for ``PDC_WRITE``, and the other is for ``PDC_READ``. This section describes how ``pdc_region_transfer_request.c`` implements the communication from client to transfer. The core implementation is in the static function ``PDC_Client_start_all_requests``.
-
-Firstly, an array of ``pdc_transfer_request_start_all_pkg`` is sorted based on the target data server ID. Then, dor adjacent ``pdc_transfer_request_start_all_pkg`` that sends to the same data server ID, these packages are packed into a single contiguous memory buffer using the static function ``PDC_Client_pack_all_requests``. This memory buffer is passed to the ``pdc_client_connect`` layer for mercury transfer.
-
-Region transfer request wait: Region transfer request start does not guarantee the finish of data communication or I/O at the server by default. To make sure the input memory buffer is reusable or deletable, a wait function can be used. The wait function is also called implicitly when the object is closed, or special POSIX semantics is set ahead of time when the object is created.
-
-Region Transfer Request Wait
----------------------------------------------
-
-Similar to the start case, the wait API has single and aggregated versions ``PDCregion_transfer_start`` and ``PDCregion_transfer_start_all``. It is possible to wait for more than one request using the aggregated version.
-
-The implementation of the wait all operation is similar to the implementation of the start all request. Firstly, packages defined by the structure ``PDCregion_transfer_wait_all`` are created. ``PDCregion_transfer_wait_all`` only contains the remote region transfer request ID and data server ID. These packages are sorted based on the data server ID. Region transfer requests to the same data server are packed into a contiguous buffer and sent through the PDC client connect module.
-
-Region transfer request wait client control: As mentioned earlier, the region transfer request start all function packs many data packages into the same contiguous buffer, and passes this buffer to the PDC client connect layer for mercury transfer. This buffer can be shared by more than one region transfer request. This buffer can only be freed once wait operations are called on all these requests (not necessarily in a single wait operation call).
-
-When a wait operation is called on a subset of these requests, we reduce the reference counter of the buffer. This reference counter is a pointer stored by the structure ``pdc_transfer_request``. In terms of implementation, ``pdc_transfer_request`` stores an array of reference counter pointers and an array of data buffer pointers. Both arrays have the same size, forming a one-to-one mapping. Each of the data buffer pointers points to an aggregated memory buffer that this region transfer request packs some of its metadata/data into. When the aggregated buffer is created, the corresponding reference counter is set to be the number of region transfer requests that store the reference counter/data buffer pointers. As a result, when all of these region transfer requests have waited, the reference counter becomes zero, and the data buffer can be freed.
-
----------------------------------------------
-Region Transfer Request at Server
----------------------------------------------
-
-The region transfer request module at the server side is implemented in the ``server/pdc_server_region`` directory. This section describes how a data server is implemented at the server side.
-
-Server Region Transfer Request RPC
----------------------------------------------
-
-At the PDC server side, ``pdc_client_server_common.c`` contains all the RPCs' entrances from client calls. ``pdc_server_region_request_handler.h`` contains all the RPCs' related to region transfer requests. The source code is directly included in the ``pdc_client_server_common.c``. ``HG_TEST_RPC_CB(transfer_request, handle)`` and ``HG_TEST_RPC_CB(transfer_request_all, handle)`` are the server RPCs for region transfer request start and region transfer request start all functions called at the client side. ``HG_TEST_RPC_CB(transfer_request_wait, handle)`` and ``HG_TEST_RPC_CB(transfer_request_wait_all, handle)`` are the server RPCs for region transfer request wait and region transfer request wait all.
-
-All functions containing ``cb`` at the end refer to the mercury bulk transfer callback functions. Mercury bulk transfer is used for transferring variable-sized data from client to server. The bulk transfer argument is passed through mercury RPC augment when server RPC is triggered. This argument is used by ``HG_Bulk_create`` and ``HG_Bulk_transfer`` to initiate data transfer from client to server. Once the transfer is finished, the mercury bulk transfer function triggers the call back function (one of the arguments passed to ``HG_Bulk_transfer``) and processes the data received (or sent to the client).
-
-Server Nonblocking Control
----------------------------------------------
-
-By design, the region transfer request start does not guarantee the finish of data transfer or server I/O. In fact, this function should return to the application as soon as possible. Data transfer and server I/O can occur in the background so that client applications can take advantage of overlapping timings between application computations and PDC data management.
-
-Server Data Cache
----------------------------------------------
-
-PDC supports server-side write data cache and is enabled in the CMake option ``PDC_SERVER_CACHE`` by default. Each time the server receives a region writerequest, it will cache the data in the server's memory without writing it to the file system. The server monitors both the total amount of cached data and how long it has not received any I/O requests to determine when to flush the data from cache to the file system. Two additional CMake options ``PDC_SERVER_CACHE_MAX_GB`` and ``PDC_SERVER_IDLE_CACHE_FLUSH_TIME`` can be set to affect the cache flush behavior. When the cached data size reaches the limit or the server is idle longer than the idle time, the flush operation is triggered. With the idle time trigger, when a new I/O request is received during the flush, PDC will stop flushng the next region and reset the timer to avoid interfering with the client's I/O. Setting ``export PDC_SERVER_CACHE_NO_FLUSH=0`` can disable the flush operation and keep the data in cache.
-
-Server Region Transfer Request Start
----------------------------------------------
-
-When server RPC for region transfer request start is triggered, it immediately starts the bulk transfer by calling the mercury bulk transfer functions.
-In addition, the region transfer request received by the data server triggers a register function ``PDC_transfer_request_id_register`` implemented ``pdc_server_region_transfer.c``. This function returns a unique remote region transfer ID. This remote ID is returned to the client for future reference, so the wait operation can know which region transfer request should be finished on the data server side.
-
-Then, ``PDC_commit_request`` is called for request registration. This operation pushes the metadata for the region transfer request to the end of the data server's linked list for temporary storage.
-
-Finally, the server RPC returns a finished code to the client so that the client can return to the application immediately.
-
-Server Region Transfer Data Sieving
----------------------------------------------
-When reading a 2D/3D region, PDC server uses data sieving if a subset of a storage region is requested, which would improve the read performance. The entire region is read as a contiguous chunk and the request subset will be extracted before sending the data to the client. Setting ``export PDC_DATA_SIEVING=0`` before running the server will disable this feature.
-
-
-Server Region Transfer Request Wait
----------------------------------------------
-
-The request wait RPC on the server side receives a client's remote region transfer request ID. The RPC returns to the client when this request's local data server I/O is finished.
-
-The implementation uses the ``PDC_check_request`` function in the ``pdc_server_region_transfer.c`` file. This function returns two possibilities. One possible return value is ``PDC_TRANSFER_STATUS_COMPLETE``. In this case, the wait function can immediately return to the client. Another possibility is ``PDC_TRANSFER_STATUS_PENDING``. This flag means that the local server I/O has not finished yet, so this RPC function will not return to the client. Instead, the mercury handle is binded to the structure ``pdc_transfer_request_status`` (defined in ``pdc_server_region_transfer.h``) that stores the metadata of the region transfer request (search by its ID) within the function ``PDC_check_request``.
-
-When the region transfer request callback function for this region transfer is triggered, and the I/O operations are finished, the callback function calls ``PDC_finish_request`` to trigger the return of the wait mercury handle binded to the region transfer request. If a mercury handler is not found, ``PDC_finish_request`` sets the flag of ``pdc_transfer_request_status`` for the region transfer request to be ``PDC_TRANSFER_STATUS_COMPLETE``, so a wait request called later can immediately return as described before. Server region transfer request aggregated mode: the server acquired a contiguous memory buffer through mercury bulk transfer for aggregated region transfer request start and wait calls. This contiguous memory buffer contains packed request metadata/data from the client side. These requests are parsed. For each of the requests, we process them one at a time. The processing method is described in the previous section.
-
----------------------------------------------
-Server Region Storage
----------------------------------------------
-
-PDC is a data management library. I/O is part of its service. Therefore, I/O operation is critical for data persistence. The function ``PDC_Server_transfer_request_io`` in the ``pdc_server_region_transfer.c`` file implements the core I/O function. There are two I/O modes for PDC.
-In general, one PDC object is stored in one file per data server.
-
-Storage by File Offset
----------------------------------------------
-
-I/O by file only works for objects with fixed dimensions. Clients are not allowed to modify object dimensions by any means. When a region is written to an object, the region is translated into arrays of offsets and offset lengths based on the region shape using list I/O. Therefore, a region has fixed offsets to be placed on the file.
-
-Storage by Region
----------------------------------------------
-
-I/O by region is a special feature of the PDC I/O management system. Writing a region to an object will append the region to the end of a file. If the same region is read back again sometime later, it only takes a single POSIX ``lseek`` and I/O operation to complete either write or read.
-
-However, when a new region is written to an object, it is necessary to scan all the previously written regions to check for overlapping. The overlapping areas must be updated accordingly. If the new region is fully contained in any previously stored regions, it is unnecessary to append it to the end of the file.
-
-I/O by region will store repeated bytes when write requests contain overlapping parts. In addition, the region update mechanism generates extra I/O operations. This is one of its disadvantages. Optimization for region search (as R trees) in the future can relieve this problem.
-
-Storage Compression (Prototype)
----------------------------------------------
-
-PDC has partial support for storing the compressed data for each storage regions with the ZFP compression library. Currently the compression is hard-coded to the ZFP accuracy mode.
-
-+++++++++++++++++++++++++++++++++++++++++++++
-Contributing to PDC project
-+++++++++++++++++++++++++++++++++++++++++++++
-
-In this section, we will offer you some helpful technical guidance on how to contribute to the PDC project. These 'HowTos' will help you when implementing new features or fixing bugs.
-
-
----------------------------------------------
-How to set up code formatter for PDC on Mac?
----------------------------------------------
-
-1. PDC project uses clang-format v10 for code formatting and style check.
- 1. However, on MacOS, the only available clang-format versions are v8 and v11 if you try to install it via Homebrew.
- 2. To install v10, you need to download it from: https://releases.llvm.org/download.html (https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.1/llvm-project-10.0.1.tar.xz)
- 3. Then follow instruction here to install clang-format: https://clang.llvm.org/get_started.html. I would suggest you do the following (suppose if you already have homebrew installed)
-
- .. code-block:: Bash
-
- cd $LLVM_SRC_ROOT
- mkdir build
- cd build
- cmake -G 'Unix Makefiles' -DCMAKE_INSTALL_PREFIX=/opt/llvm/v10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DLLVM_ENABLE_PROJECTS=clang ../llvm
- make -j 128
- sudo make install
- sudo ln -s /opt/llvm/v10/bin/clang-format /opt/homebrew/bin/clang-format-v10
-
-
- 4. To format all your source code, do the following
-
- .. code-block:: Bash
-
- cd pdc
- clang-format-v10 -i -style=file src/*
- find src -iname *.h -o -iname *.c | xargs clang-format-v10 -i -style=file
-
-
- 5. You can also configure clang-format to be your default C/C++ formatting tool in VSCode, and the automatic code formatter is really convenient to use.
-
----------------------------------------------
-How to implement an RPC?
----------------------------------------------
-
-This section covers how to implement a simple RPC from client to server. If you call an RPC on the client side, the server should be able to get the argument you passed from the client and execute the corresponding server RPC function.
-
-A concrete example is ``PDC_region_transfer_wait_all``. Mercury transfers at the client side are implemented in ``pdc_client_connect.c``. The name of the function we are using in this example is ``transfer_request_wait_all``. For each component mentioned next, replace ``transfer_request_wait_all`` with your function name. This section will not discuss the design of ``transfer_request_wait_all`` but rather point out where the Mercury components are and how they interact.
-
-Firstly, in ``pdc_client_connect.c``, search for ``transfer_request_wait_all_register_id_g``. Create another variable by replacing ``transfer_request_wait_all`` with your function name. Secondly, search for ``client_send_transfer_request_wait_all_rpc_cb``, and do the same text copy and replacement. This is the callback function on the client side when the RPC is finished on the server side. For most cases, this function loads the server return arguments to a structure and returns the values to the client RPC function. There is also some error checking. Then, search for ``PDC_transfer_request_wait_all_register(*hg_class)`` and ``PDC_Client_transfer_request_wait_all``, and do text copy and replacement for both. This function is the entry point of the mercury RPC call. It contains argument loading, which has the variable name ``in``' This RPC creates a mercury bulk transfer inside it. ``HG_Create`` and ``HG_Bulk_create`` are unnecessary if your mercury transfer does not transfer variable-sized data. ``HG_Forward`` has an argument ``client_send_transfer_request_wait_all_rpc_cb``. The return values from the callback function are placed in ``transfer_args``.
-
-In file ``pdc_client_connect.h``, search for ``_pdc_transfer_request_wait_all_args``, do the text copy and replacement. This structure is the structure for returning values from client call back function ``client_send_transfer_request_wait_all_rpc_cb`` to client RPC function ``PDC_Client_transfer_request_wait_all``. For most cases, an error code is sufficient. For other cases, like creating some object IDs, you must define the structure accordingly. Do not forget to load data in ``_pdc_transfer_request_wait_all_args``. Search for ``PDC_Client_transfer_request_wait_all``, and make sure you register your client connect entry function in the same way.
-
-In file ``pdc_server.c``, search for ``PDC_transfer_request_wait_all_register(hg_class_g);``, make a copy, and replace the ``transfer_request_wait_all`` part with your function name (your function name has to be defined and used consistently throughout all these copy and replacement).
-In the file ``pdc_client_server_common.h``, search for ``typedef struct transfer_request_wait_all_in_t``. This is the structure used by a client passing its argument to the server side. You can define whatever you want that is fixed-sized inside this structure. If you have variable-sized data, it can be passed through mercury bulk transfer. The handle is ``hg_bulk_t local_bulk_handle``. ``typedef struct transfer_request_wait_all_out_t`` is the return argument from the server to the client after the server RPC is finished. Next, search for ``hg_proc_transfer_request_wait_all_in_t``. This function defines how arguments are transferred through mercury.
-Similarly, ``hg_proc_transfer_request_wait_all_in_t`` is the other way around. Next, search for ``struct transfer_request_wait_all_local_bulk_args``. This structure is useful when a bulk transfer is used. Using this function, the server passes its variables from the RPC call to the bulk transfer callback function. Finally, search for ``PDC_transfer_request_wait_all_register``. For all these structures and functions, you should copy and replace ``transfer_request_wait_all`` with your own function name.
-
-In file ``pdc_client_server_common.c``, search for ``PDC_FUNC_DECLARE_REGISTER(transfer_request_wait_all)`` and ``HG_TEST_THREAD_CB(transfer_request_wait_all)``, do text copy and function name replacement. ``pdc_server_region_request_handler.h`` is included directly in ``pdc_client_server_common.c``. The server RPC of ``transfer_request_wait_all`` is implemented in ``pdc_server_region_request_handler.h``. However, it is possible to put it directly in the ``pdc_client_server_common.c``.
-
-Let us open ``pdc_server_region_request_handler.h``. Search for ``HG_TEST_RPC_CB(transfer_request_wait_all, handle)``. This function is the entry point for the server RPC function call. ``transfer_request_wait_all_in_t`` contains the arguments you loaded previously from the client side. If you want to add more arguments, return to ``pdc_client_server_common.h`` and modify it correctly. ``HG_Bulk_create`` and ``HG_Bulk_transfer`` are the mercury bulk function calls. When the bulk transfer is finished, ``transfer_request_wait_all_bulk_transfer_cb`` is called.
-
-After a walk-through of ``transfer_request_wait_all``, you should have learned where different components of a mercury RPC should be placed and how they interact with each other. You can trace other RPC by searching their function names. If you miss things that are not optional, the program will likely hang there forever or run into segmentation faults.
-
-
----------------------------------------------
-Julia Support for tests
----------------------------------------------
-Currently, we add all Julia helper functions to `src/tests/helper/JuliaHelper.jl`
-
-Once you implement your own Julia function, you can use the bridging functions (named with prefix `run_jl_*`) defined in `src/tests/helper/include/julia_helper_loader.h` to call your Julia functions. If the current bridging functions are not sufficient for interacting with your Julia functions, you can add your own bridging functions in `src/tests/helper/include/julia_helper_loader.h` and implement it in `src/tests/helper/include/julia_helper_loader.c`.
-
-When calling your bridging functions, the best example you can follow is `src/tests/dart_attr_dist_test.c`.
-
-Remember, you must include all your bridging function calls inside the following code blocks, so that the process can have its own Julia runtime loaded.
-
-.. code-block:: C
-
- jl_module_list_t modules = {.julia_modules = (char *[]){JULIA_HELPER_NAME}, .num_modules = 1};
- init_julia(&modules);
- ......
- ... call your bridging functions
- ......
- close_julia();
-
-Also, to make sure your code with Julia function calls doesn't get compiled when Julia support is not there, you can add your new test to the list of `ENHANCED_PROGRAMS` in `src/tests/CMakeLists.txt`.
-
-For more info on embedded Julia support, please visit: `Embedded Julia https://docs.julialang.org/en/v1/manual/embedding/`_.
-
----------------------------------------------
-Docker Support
----------------------------------------------
-
-Sometimes, you may want to have a development or testing environment to work on PDC.
-
-We provide docker support for PDC on such purpose.
-
-To build the docker image, you can run the following command in the root directory of PDC project:
-
-.. code-block:: Bash
-
- .docker/run_dev_base.sh
-
-This will mount your PDC project directory to `/workspaces/pdc` directory in the docker container and an initial step will be performed once you attach to the container.
-The experience will be pretty much similar to the Github Codespace.
-
-
--------------------------------------------------
-Github Codespace && VSCode Dev Container Support
--------------------------------------------------
-
-Now the PDC project can be built and run in Github Codespace. For more information on how to create Github Codespace, please refer to `Github Codespace Documentation `_
-
-You can also use VSCode Dev Container to develop PDC as long as you have VSCode and Docker installed on you local computer. For more information on VSCode dev container, please refer to `Developing inside a Container `_ .
-
-When you create a code space, you can find your PDC project in `/workspaces/pdc` directory.
-And, you can find your PDC project and its dependencies in `/home/project` directory, you will see the same directory structure there as described in our standalone installation guide.
-
-Since you are using the same PDC dev_base docker image, everything should be the same as in the docker support described above.
-
-
-------------------------------------------------
-Maintaining Docker Image
-------------------------------------------------
-
-We currently only support to architectures, amd64 and arm64v8.
-To build the architecture-specific docker image on the machine with specific CPU architecture, you can run the following command in the root directory of PDC project:
-
-.. code-block:: Bash
-
- .docker/publish_dev_base.sh
-
-If you run the above command on an ARM64v8 CPU (say, Apple Silicon Mac), it will generate an image named '/pdc_dev_base:-arm64v8'.
-If you run the above command on any Intel X64/AMD x64 CPU (say, Microsoft surface or Apple Intel Mac, or an Intel CPU VM from AWS/Azure/GCP/OCI), it will generate an image named '/pdc_dev_base:-amd64'.
-Once the above is done, you can pick the image build machine with fastest network and run the following
-
-.. code-block:: Bash
-
- .docker/publish_dev_base.sh 1
-
-This will create a multi-arch image with both amd64 and arm64v8 architectures in your registry under your namespace.
-All two different architecture-specific imagest will be linked to a manifest in your docker registry named '/pdc_dev_base:latest'.
-
-------------------------------------------------------------
-Tracking your memory consumption with each memory allocation
-------------------------------------------------------------
-
-Now, you can use the APIs in `src/commons/utils/pdc_malloc.c` to allocate memory when needed.
-Using these APIs and macros will allow you to track your memory consumption with each memory allocation.
-You can get the total memory consumption anytime by calling `PDC_get_global_mem_usage()`.
-
-Also, the root CMakeLists.txt file will automatically detect if HAVE_MALLOC_USABLE_SIZE is available.
-If so, the memory consumption will be more accurate (summation of both allocation and freeing). Otherwise, it will be less accurate but still usable (only measure the total memory ever allocated).
-
-
-
------------------------------------------------------------
-Debugging PDC on Perlmutter with LinaroForge
------------------------------------------------------------
-
-First, you need to start an interactive session on Perlmutter:
-
-.. code-block:: Bash
-
- salloc -N 1 -C cpu -q interactive -t 1:00:00
-
-Then, you can load the ``forge`` module:
-
-.. code-block:: Bash
-
- module load forge
- module list
-
-You should see the forge module loaded and the version of it.
-
-Make sure you have the compatible version LinaroForge client installed.
-
-Go to this website https://www.linaroforge.com/download-documentation and find the compatible version of LinaroForge client for your architecture.
-
-To run a test, let's lunch pdc_server with 4 cores:
-
-.. code-block:: Bash
-
- cd $PDC_HOME/build
- rm -rf ./pdc_tmp # optional if you need to clean up the PDC tmp directory
- srun -N 1 -n 4 -c 2 --mem=25600 --cpu_bind=cores ./bin/pdc_server &
-
-To debug the client, you can run the following command:
-
-.. code-block:: Bash
-
- cd $PDC_HOME/build
- ddt --connect srun -N 1 -n 4 -c 2 --mem=25600 --cpu_bind=cores ./bin/pdc_client.exe
-
-But if you need to debug the server, you can prepend ``srun`` with ``ddt --connect``:
-
-.. code-block:: Bash
-
- cd $PDC_HOME/build
- rm -rf ./pdc_tmp # optional if you need to clean up the PDC tmp directory
- ddt --connect srun -N 1 -n 4 -c 2 --mem=25600 --cpu_bind=cores ./bin/pdc_server &
-
-We recommend to use 1 node when debugging PDC, but if memory is not sufficient, you can use more nodes.
diff --git a/docs/source/developer_guide.rst b/docs/source/developer_guide.rst
new file mode 100644
index 000000000..39c548d8d
--- /dev/null
+++ b/docs/source/developer_guide.rst
@@ -0,0 +1,59 @@
+.. _developer_guide:
+
+**7.** Developer Guide
+======================
+
+**7.1** Asynchronous Input/Output and Event Handling
+----------------------------------------------------
+
+PDC’s architecture enables efficient asynchronous I/O operations, allowing computation and communication to overlap, which improves application performance especially in HPC environments.
+
+
+Using asynchronous APIs:
+
+* PDC provides non-blocking APIs for data transfer such as PDCregion_transfer_start() which initiates a transfer without waiting for completion
+* Multiple asynchronous transfers can be launched concurrently to maximize throughputs
+* Buffers remain valid until the transfer completes, so the application should avoid modifying memory before transfer completion.
+
+Monitoring events:
+
+* Applications can query the status of transfers using event monitoring APIs like PDCregion_transfer_wait() or polling mechanisms
+* Event callbacks can be registered to handle completion asynchronously, improving responsiveness and resource management.
+
+Waiting for events:
+
+* Synchronization can be achieved by explicitly waiting for event completion to ensure data consistency
+* Use blocking calls or condition variables to coordinate dependent computations after I/O completion
+
+
+**7.2** Scalability and Performance
+-----------------------------------
+
+Data placement:
+
+* PDC supports policy-driven data placement to optimize locality and bandwidth usage
+* Object metadata guides data distribution across different storage hierarchies
+* Applications can hint preferred storage classes or tiers to improve I/O performance.
+
+PDC server tuning:
+
+* Server-side parameters such as thread counts, buffer sizes, and cache policies can be tuned for target workloads and for different data types
+* Load balancing between servers ensures no single node becomes a bottleneck, and that the data is balanced properly
+* Profiling server behavior helps identify hot spots or resource contention.
+
+
+**7.3** Integration with MPI and Libraries
+------------------------------------------
+
+Using PDC with MPI:
+
+PDC seamlessly integrates with MPI for communication in distributed-memory environments. MPI ranks act as PDC clients issuing data operations concurrently. Also, MPI synchronization primitives can coordinate phases of PDC usage.
+
+Comparison between libraries:
+
+* Unlike traditional MPI-IO, PDC offers object-based APIs with asynchronous data transfers and metadata indexing
+* Compared to HDF5 or ADIOS, PDC provides a more flexible abstraction layer, optimized for highly scalable and concurrent workloads
+* PDC’s client-server model decouples data access from storage layout, enabling adaptable backends.
+
+
+PDC Tools
diff --git a/docs/source/documentation/analysis.rst b/docs/source/documentation/analysis.rst
deleted file mode 100644
index b2cfdf1a2..000000000
--- a/docs/source/documentation/analysis.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-================================
-Analysis
-================================
-
-.. doxygenfile:: pdc_analysis.c
-.. doxygenfile:: pdc_analysis_common.c
-.. doxygenfile:: pdc_analysis_and_transforms_connect.c
-.. doxygenfile:: pdc_hist_pkg.c
diff --git a/docs/source/documentation/objects.rst b/docs/source/documentation/objects.rst
deleted file mode 100644
index e35878cc1..000000000
--- a/docs/source/documentation/objects.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-================================
-Containers
-================================
-
-.. doxygenfile:: pdc_cont.c
-
-================================
-Objects
-================================
-
-.. doxygenfile:: pdc_obj.c
-.. doxygenfile:: pdc_mpi.c
-
-================================
-Preperties
-================================
-
-.. doxygenfile:: pdc_prop.c
\ No newline at end of file
diff --git a/docs/source/documentation/query.rst b/docs/source/documentation/query.rst
deleted file mode 100644
index 12d24ebcb..000000000
--- a/docs/source/documentation/query.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-================================
-Query
-================================
-
-.. doxygenfile:: pdc_query.c
\ No newline at end of file
diff --git a/docs/source/documentation/regions.rst b/docs/source/documentation/regions.rst
deleted file mode 100644
index 94cfdabc7..000000000
--- a/docs/source/documentation/regions.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-================================
-Regions
-================================
-
-.. doxygenfile:: pdc_region.c
-.. doxygenfile:: pdc_region_transfer.c
diff --git a/docs/source/documentation/transformation.rst b/docs/source/documentation/transformation.rst
deleted file mode 100644
index c43df32ec..000000000
--- a/docs/source/documentation/transformation.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-================================
-Transformation
-================================
-
-.. doxygenfile:: pdc_transform.c
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
deleted file mode 100644
index fdb85cdd7..000000000
--- a/docs/source/examples.rst
+++ /dev/null
@@ -1,123 +0,0 @@
-================================
-Examples
-================================
-
-* PDC regression tests can be found in https://github.com/hpc-io/pdc/tree/stable/src/tests.
-* Please follow the instructions for PDC installations.
-* PDC programs start with PDC servers running in the background.
-* Client programs uses PDC APIs to forward requests to PDC servers.
-* Scripts run_test.sh, mpi_test.sh, and run_multiple_tests.sh automatically run start and close PDC servers
-
-Usage:
-
-.. code-block:: Bash
-
- ./run_test.sh ./pdc_client_application arg1 arg2 .....
- ./mpi_test.sh ./pdc_client_application MPIRUN_CMD number_of_servers number_of_clients arg1 arg2 ....
- ./run_multiple_test.sh ./pdc_client_application_1 ./pdc_client_application_2 ......
-
----------------------------
-PDC Hello World
----------------------------
-
-* pdc_init.c
-* A PDC program starts with PDCinit and finishes with PDCclose.
-* To a simple hello world program for PDC, use the following command.
-
-.. code-block:: Bash
-
- make pdc_init
- ./run_test.sh ./pdc_init
-
-* The script "run_test.sh" starts a server first. Then program "obj_get_data" is executed. Finally, the PDC servers are closed.
-* Alternatively, the following command can be used for multile MPI processes.
-
-
-.. code-block:: Bash
-
- make pdc_init
- ./mpi_test.sh ./pdc_init mpiexec 2 4
-
-* The above command will start a server with 2 processes. Then it will start the application program with 4 processes. Finally, all servers are closed.
-* On supercomputers, "mpiexec" can be replaced with "srun", "jsrun" or "aprun".
-
----------------------------
-Simple I-O
----------------------------
-
-* This example provides a easy way for PDC beginners to write and read data with PDC servers. It can be found in obj_get_data.c
-* Functions PDCobj_put_data and PDCobj_get_data are the easist way to write/read data from/to a contiguous memory buffer.
-* This example writes different size of data to two objects. It then read back the data to check whether the data is correct or not.
-* To run this example, use the following command lines.
-
-.. code-block:: Bash
-
- make obj_get_data
- ./run_test.sh ./obj_get_data
- ./mpi_test.sh ./obj_get_data mpiexec 2 4
-
----------------------------
-I-O with region mapping
----------------------------
-
-* The simple I/O can only handles 1D data that is contiguous. PDC supports data dimension up to 3. Simple I/O functions PDCobj_put_data and PDCobj_get_data are wrappers for object create, region mapping, I/O, and object close. The examples in this section breakdowns the wrappers, which allows more flexibility.
-* Check region_obj_map_2D.c and region_obj_map_3D.c for how to write 2D and 3D data.
-* Generally, PDC perform I/O with the PDCbuf_obj_map, PDCreg_obtain_lock, PDCreg_release_lock, and PDCbuf_obj_unmap. The logic is similar to HDF5 dataspace and memory space. In PDC language, they are remote region and local region. The lock functions for remote regions allow PDC servers to handle concurrent requests from different clients without undefined behaviors.
-* To run thie example, use the following command lines.
-
-.. code-block:: Bash
-
- make
- ./run_test.sh ./region_obj_map_2D
- ./mpi_test.sh ./region_obj_map_2D mpiexec 2 4
- ./run_test.sh ./region_obj_map_3D
- ./mpi_test.sh ./region_obj_map_3D mpiexec 2 4
-
----------------------------
-VPIC-IO and BD-CATS-IO
----------------------------
-
-* VPIC is a particle simulation code developed at Los Alamos National Laboratory (LANL). VPIC-IO benchmark is an I/O kernel representing the I/O pattern of a space weather simulation exploring the magnetic reconnection phenomenon. More details of the simulation itself can be found at vpic.pdf.
-* BD-CATS is a Big Data clustering (DBSCAN) algorithm that uses HPC systems to analyze trillions of particles. BD-CATS typically analyze data produced by simulations such as VPIC. BD-CATS-IO represents the I/O kernel of the clustering algorithm. More details of BD-CATS can be found at https://sdm.lbl.gov/~sbyna/research/papers/201511-SC15-BD-CATS.pdf
-* To run VPIC-IO and BD-CATS-IO together: Go to the bin folder first after make. Then type
-
-.. code-block:: Bash
-
- ./run_multiple_test.sh ./vpicio ./bdcats
-
-* VPIC-IO:
- * vpicio.c
- * VPIC I/O is an example for writing multiple objects using PDC, where each object is a variable of particles.
- * We collectively create containers and objects. PDC region map is used to write data to individual objects.
-* BD-CATS-IO:
- * bdcats.c
- * BD-CATS-IO is an example for reading data written by VIPIC I/O.
-* To run this example
-
-.. code-block:: Bash
-
- cd make
- ./run_multiple_test.sh ./vpicio ./bdcats
-
----------------------------
-HACC-IO
----------------------------
-
-* The purpose of this benchmark is to evaluate the performance of the I/O system for the Hardware Accelerated Cosmology Code (HACC) simulation. The HACC framework uses N-body techniques to simulate the formation of structure in collisionless fluids under the influence of gravity in an expanding universe.
-* Each MPI rank writes 9 variables (with different data types) per particle for a total of 38 bytes.
-
-.. code-block:: Bash
-
- ./mpi_test.sh ./haccio mpiexec 1 num-procs num-particles
-
----------------------------
-Tile-IO
----------------------------
-
-* In Tile-IO, esach MPI rank writes one tile of a desen 2D data grid. The number of tiles in each dimension and the number of particles hold by each tile can be set through the command line arguments.
-* The total data size is: (is x-tiles * y-tiles) * (x-elements-per-tile * y-elements-per-tile).
-* Currently, the number of processes should be equal to (x-tiles * y-tiles). i.e., one tile per MPI rank. We plan to lift this restriction later and support overlap between tiles.
-
-.. code-block:: Bash
-
- ./mpi_test.sh ./tileio mpiexec num-servers num-procs x-tiles y-tiles x-elements-per-tile y-elements-per-tile
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
deleted file mode 100644
index 4892e04bd..000000000
--- a/docs/source/getting_started.rst
+++ /dev/null
@@ -1,300 +0,0 @@
-================================
-Getting Started
-================================
-
-++++++++++++++++++++++++++++++++++
-Installing PDC with Spack
-++++++++++++++++++++++++++++++++++
-
-Spack is a package manager for supercomputers, Linux, and macOS.
-More information about Spack can be found at: https://spack.io
-PDC and its dependent libraries can be installed with spack:
-
-.. code-block:: Bash
-
- # Clone Spack
- git clone -c feature.manyFiles=true https://github.com/spack/spack.git
- # Install the latest PDC release version with Spack
- ./spack/bin/spack install pdc
-
-If you run into issues with ``libfabric`` on macOS and some Linux distributions, you can enable all fabrics by installing PDC using:
-
-.. code-block:: Bash
-
- spack install pdc ^libfabric fabrics=sockets,tcp,udp,rxm
-
-++++++++++++++++++++++++++++++++++
-Installing PDC from source code
-++++++++++++++++++++++++++++++++++
-We recommend using GCC 7 or a later version. Intel and Cray compilers also work.
-
----------------------------
-Dependencies
----------------------------
-The following dependencies need to be installed:
-
-* MPI
-* libfabric
-* Mercury
-
-PDC can use either MPICH or OpenMPI as the MPI library, if your system doesn't have one installed, follow `MPICH Installers’ Guide `_ or `Installing Open MPI `_
-
-We provide detailed instructions for installing libfabric, Mercury, and PDC below.
-
-.. attention::
-
- Following the instructions below will record all the environmental variables needed to run PDC in the ``$WORK_SPACE/pdc_env.sh`` file, which can be used for future PDC runs with ``source $WORK_SPACE/pdc_env.sh``.
-
-
-Prepare Work Space and download source codes
---------------------------------------------
-Before installing the dependencies and downloading the code repository, we assume there is a directory created for your installation already, e.g. `$WORK_SPACE` and now you are in `$WORK_SPACE`.
-
-.. code-block:: Bash
- :emphasize-lines: 1
-
- export WORK_SPACE=/path/to/your/work/space
- mkdir -p $WORK_SPACE/source
- mkdir -p $WORK_SPACE/install
-
- cd $WORK_SPACE/source
- git clone https://github.com/ofiwg/libfabric
- git clone https://github.com/mercury-hpc/mercury --recursive
- git clone https://github.com/hpc-io/pdc
-
- export LIBFABRIC_SRC_DIR=$WORK_SPACE/source/libfabric
- export MERCURY_SRC_DIR=$WORK_SPACE/source/mercury
- export PDC_SRC_DIR=$WORK_SPACE/source/pdc
-
- export LIBFABRIC_DIR=$WORK_SPACE/install/libfabric
- export MERCURY_DIR=$WORK_SPACE/install/mercury
- export PDC_DIR=$WORK_SPACE/install/pdc
-
- mkdir -p $LIBFABRIC_SRC_DIR
- mkdir -p $MERCURY_SRC_DIR
- mkdir -p $PDC_SRC_DIR
-
- mkdir -p $LIBFABRIC_DIR
- mkdir -p $MERCURY_DIR
- mkdir -p $PDC_DIR
-
- # Save the environment variables to a file
- echo "export LIBFABRIC_SRC_DIR=$LIBFABRIC_SRC_DIR" > $WORK_SPACE/pdc_env.sh
- echo "export MERCURY_SRC_DIR=$MERCURY_SRC_DIR" >> $WORK_SPACE/pdc_env.sh
- echo "export PDC_SRC_DIR=$PDC_SRC_DIR" >> $WORK_SPACE/pdc_env.sh
- echo "export LIBFABRIC_DIR=$LIBFABRIC_DIR" >> $WORK_SPACE/pdc_env.sh
- echo "export MERCURY_DIR=$MERCURY_DIR" >> $WORK_SPACE/pdc_env.sh
- echo "export PDC_DIR=$PDC_DIR" >> $WORK_SPACE/pdc_env.sh
-
-
-From now on you can simply run the following commands to set the environment variables:
-
-.. code-block:: Bash
- :emphasize-lines: 1
-
- export WORK_SPACE=/path/to/your/work/space
- source $WORK_SPACE/pdc_env.sh
-
-
-
-Install libfabric
------------------
-
-.. code-block:: Bash
-
- cd $LIBFABRIC_SRC_DIR
- git checkout v1.18.0
- ./autogen.sh
- ./configure --prefix=$LIBFABRIC_DIR CC=mpicc CFLAG="-O2"
- make -j && make install
-
- # Test the installation
- make check
-
- # Set the environment variables
- export LD_LIBRARY_PATH="$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH"
- export PATH="$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH"
- echo 'export LD_LIBRARY_PATH=$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh
- echo 'export PATH=$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh
-
-
-.. note::
-
- ``CC=mpicc`` may need to be changed to the corresponding compiler in your system, e.g. ``CC=cc`` or ``CC=gcc``.
- On Perlmutter@NERSC, ``--disable-efa --disable-sockets`` should be added to the ``./configure`` command when compiling on login nodes.
-
-.. attention::
-
- When installing on MacOS, make sure to enable ``sockets`` with the following configure command:
- ``./configure CFLAG=-O2 --enable-sockets=yes --enable-tcp=yes --enable-udp=yes --enable-rxm=yes``
-
-
-Install Mercury
----------------
-
-.. code-block:: Bash
-
- cd $MERCURY_SRC_DIR
- # Checkout a release version
- git checkout v2.2.0
- mkdir build
- cd build
- cmake -DCMAKE_INSTALL_PREFIX=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DBUILD_SHARED_LIBS=ON \
- -DBUILD_TESTING=ON -DNA_USE_OFI=ON -DNA_USE_SM=OFF -DNA_OFI_TESTING_PROTOCOL=tcp ../
- make -j && make install
-
- # Test the installation
- ctest
-
- # Set the environment variables
- export LD_LIBRARY_PATH="$MERCURY_DIR/lib:$LD_LIBRARY_PATH"
- export PATH="$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH"
- echo 'export LD_LIBRARY_PATH=$MERCURY_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh
- echo 'export PATH=$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh
-
-.. note::
-
- ``CC=mpicc`` may need to be changed to the corresponding compiler in your system, e.g. ``-DCMAKE_C_COMPILER=cc`` or ``-DCMAKE_C_COMPILER=gcc``.
- Make sure the ctest passes. PDC may not work without passing all the tests of Mercury.
-
-.. attention::
-
- When installing on MacOS, specify the ``sockets`` protocol used by Mercury by replacing the cmake command from ``-DNA_OFI_TESTING_PROTOCOL=tcp`` to ``-DNA_OFI_TESTING_PROTOCOL=sockets``
-
-
-Install PDC
------------
-
-.. code-block:: Bash
-
- cd $PDC_SRC_DIR
- git checkout develop
- mkdir build
- cd build
- cmake -DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=$PDC_DIR \
- -DPDC_ENABLE_MPI=ON -DMERCURY_DIR=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DMPI_RUN_CMD=mpiexec ../
- make -j && make install
-
- # Set the environment variables
- export LD_LIBRARY_PATH="$PDC_DIR/lib:$LD_LIBRARY_PATH"
- export PATH="$PDC_DIR/include:$PDC_DIR/lib:$PATH"
- echo 'export LD_LIBRARY_PATH=$PDC_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh
- echo 'export PATH=$PDC_DIR/include:$PDC_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh
-
-.. note::
-
- ``-DCMAKE_C_COMPILER=mpicc -DMPI_RUN_CMD=mpiexec`` may need to be changed to ``-DCMAKE_C_COMPILER=cc -DMPI_RUN_CMD=srun`` depending on your system environment.
-
-.. note::
-
- If you are trying to compile PDC on MacOS, ``LibUUID`` needs to be installed on your MacOS first. Simple use ``brew install ossp-uuid`` to install it.
- If you are trying to compile PDC on Linux, you should also make sure ``LibUUID`` is installed on your system. If not, you can install it with ``sudo apt-get install uuid-dev`` on Ubuntu or ``yum install libuuid-devel`` on CentOS.
-
- In MacOS you also need to export the following environment variable so PDC (i.e., Mercury) uses the ``socket`` protocol, the only one supported in MacOS: ``export HG_TRANSPORT="sockets"``
-
-
-Test Your PDC Installation
---------------------------
-PDC's ``ctest`` contains both sequential and parallel/MPI tests, and can be run with the following in the `build` directory.
-
-.. code-block:: Bash
-
- ctest
-
-You can also specify a timeout (e.g., 2 minutes) for the tests by specifying the ``timeout`` parameter when calling ``ctest``:
-
-.. code-block:: Bash
-
- ctest --timeout 120
-
-.. note::
-
- If you are using PDC on an HPC system, e.g. Perlmutter@NERSC, ``ctest`` should be run on a compute node, you can submit an interactive job on Perlmutter: ``salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=mxxxx``
-
-
-Install/Configure Julia Support for PDC
----------------------------------------
-Currently, in ``src/tests``, we experimentally added support for Julia programming language. With this support, you can either hand over the data to a Julia-based function to process, or you can use the result returned by a Julia-based function in which a complex logic is implemented (such as complex algebra operations or connecting to databases/cloud storages, etc).
-To know how to add a Julia function, please see Developer Notes.
-
-But before adding any of your function, we need to enable the Julia support first, and here is how:
-
-Prerequisite
-^^^^^^^^^^^^
-
-Make sure you have Julia-lang installed. You can check with your system administrator to see if you already have Julia-lang installed. If not, you can either ask your system administrator to install it for you or you can install it yourself if permitted. On macOS, the best way to install Julia is via `Homebrew `_. You may also refer to `Julia Download Page `_ for instructions on installing Julia.
-Once you installed Julia, you can set ``JULIA_HOME`` to be where Julia-lang is installed.
-
-.. code-block:: Bash
-
- export JULIA_HOME=/path/to/julia/install/directory
-
-.. note::
-
- On Perlmutter You can ``module load julia`` to load the Julia-lang module and set ``$JULIA_HOME`` with: ``export JULIA_HOME=$(dirname $(dirname $(which julia)))``
-
-
-Enabling Julia Support for PDC Tests
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Once the Prerequisite is satisfied, you can enable Julia support by adding ``--DPDC_ENABLE_JULIA_SUPPORT=ON`` to your cmake command and re-run it.
-Then you can compile your PDC project with Julia support.
-
-Now, see Developer Notes to know how you can add your own Julia functions to enhance your test cases in PDC.
-
-
-Build PDC in a Docker Container
----------------------------------------------------
-Simply run the following command from the project root directory to build PDC in a Docker container:
-
-.. code-block:: Bash
-
- .docker/run_dev_base.sh
-
-Build PDC in Github Codespace
----------------------------------------------------
-Simply start a Github Codespace from the targeting branch of your PDC project, and enjoy.
-For more information on how to create Github Codespace, please refer to `Github Codespace Documentation `_
-
-
----------------------------
-Running PDC
----------------------------
-
-If you have followed all the previous steps, ``$WORK_SPACE/pdc_env.sh`` sets all the environment variables needed to run PDC, and you only need to do the following once in each terminal session before running PDC.
-
-.. code-block:: Bash
-
- export WORK_SPACE=/path/to/your/work/space
- source $WORK_SPACE/pdc_env.sh
-
-PDC is a typical client-server application.
-To run PDC, one needs to start the server processes first, and then the clients can be started and connected to the PDC servers automatically.
-
-On Linux
---------
-Run 2 server processes in the background
-
-.. code-block:: Bash
-
- mpiexec -np 2 $PDC_DIR/bin/pdc_server &
-
-Run 4 client processes that concurrently create 1000 objects and then create and query 1000 tags:
-
-.. code-block:: Bash
-
- mpiexec -np 4 $PDC_DIR/share/test/bin/kvtag_add_get_scale 1000 1000 1000
-
-
-On Perlmutter
--------------
-Run 4 server processes, each on one compute node in the background:
-
-.. code-block:: Bash
-
- srun -N 4 -n 4 -c 2 --mem=25600 --cpu_bind=cores $PDC_DIR/bin/pdc_server &
-
-Run 64 client processes that concurrently create 1000 objects and then create and query 100000 tags:
-
-.. code-block:: Bash
-
- srun -N 4 -n 64 -c 2 --mem=25600 --cpu_bind=cores $PDC_DIR/share/test/bin/kvtag_add_get_scale 100000 100000 100000
diff --git a/docs/source/hdf5vol.rst b/docs/source/hdf5vol.rst
deleted file mode 100644
index bd0b036cf..000000000
--- a/docs/source/hdf5vol.rst
+++ /dev/null
@@ -1,93 +0,0 @@
-================================
-HDF5 VOL for PDC
-================================
-
-++++++++++++++++++++++++++++++++++
-Installing HDF5 VOL for PDC from source
-++++++++++++++++++++++++++++++++++
-
-The following instructions are for installing PDC on Linux and Cray machines. These instructions assume that PDC and its dependencies have all already been installed from source (libfabric and Mercury).
-
-Install HDF5
----------------------------
-
-If a local version of HDF5 is to be used, then the following can be used to install HDF5.
-
-.. code-block:: Bash
-
- $ wget "https://www.hdfgroup.org/package/hdf5-1-12-1-tar-gz/?wpdmdl=15727&refresh=612559667d6521629837670"
- $ mv index.html?wpdmdl=15727&refresh=612559667d6521629837670 hdf5-1.12.1.tar.gz
- $ tar zxf hdf5-1.12.1.tar.gz
- $ cd hdf5-1.12.1
- $ ./configure --prefix=$HDF5_DIR
- $ make
- $ make check
- $ make install
- $ make check-install
-
-Building VOL-PDC
----------------------------
-
-.. code-block:: Bash
-
- $ git clone https://github.com/hpc-io/vol-pdc.git
- $ cd vol-pdc
- $ mkdir build
- $ cd build
- $ cmake ../ -DHDF5_INCLUDE_DIR=$HDF5_INCLUDE_DIR -DHDF5_LIBRARY=$HDF5_LIBRARY -DBUILD_SHARED_LIBS=ON -DHDF5_DIR=$HDF5_DIR
- $ make
- $ make install
-
-To compile and run examples:
-
-.. code-block:: Bash
-
- $ cd vol-pdc/examples
- $ cmake .
- $ make
- $ mpirun -N 1 -n 1 -c 1 /bin/pdc_server &
- $ mpirun -N 1 -n 1 -c 1 ./h5pdc_vpicio test
- $ mpirun -N 1 -n 1 -c 1 /bin/close_server
-
-
-++++++++++++++++++++++++++++++++++
-VOL Function Notes
-++++++++++++++++++++++++++++++++++
-
-The following functions have been properly defined:
-
-* H5VL_pdc_info_copy
-* H5VL_pdc_info_cmp
-* H5VL_pdc_info_free
-* H5VL_pdc_info_to_str
-* H5VL_pdc_str_to_info
-
-* H5VL_pdc_file_create
-* H5VL_pdc_file_open
-* H5VL_pdc_file_close
-* H5VL_pdc_file_specific
-
-* H5VL_pdc_dataset_create
-* H5VL_pdc_dataset_open
-* H5VL_pdc_dataset_write
-* H5VL_pdc_dataset_read
-* H5VL_pdc_dataset_get
-* H5VL_pdc_dataset_close
-
-* H5VL_pdc_introspect_get_conn_cls
-* H5VL_pdc_introspect_get_cap_flags
-
-* H5VL_pdc_group_create
-* H5VL_pdc_group_open
-
-* H5VL_pdc_attr_create
-* H5VL_pdc_attr_open
-* H5VL_pdc_attr_read
-* H5VL_pdc_attr_write
-* H5VL_pdc_attr_get
-
-* H5VL_pdc_object_open
-* H5VL_pdc_object_get
-* H5VL_pdc_object_specific
-
-Any function not listed above is either not currently used by the VOL and therefore does nothing or is called by the VOL, but doesn't need to do anything relevant to the VOL and therefore does nothing.
diff --git a/docs/source/hellopdcexample.rst b/docs/source/hellopdcexample.rst
deleted file mode 100644
index 1242caf50..000000000
--- a/docs/source/hellopdcexample.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-================================
-Hello PDC Example
-================================
-
----------------------------
-PDC Hello World
----------------------------
-
-* pdc_init.c
-* A PDC program starts with PDCinit and finishes with PDCclose.
-* To a simple hello world program for PDC, use the following command.
-
-.. code-block:: Bash
-
- make pdc_init
- ./run_test.sh ./pdc_init
-
-* The script "run_test.sh" starts a server first. Then program "obj_get_data" is executed. Finally, the PDC servers are closed.
-* Alternatively, the following command can be used for multile MPI processes.
-
-
-.. code-block:: Bash
-
- make pdc_init
- ./mpi_test.sh ./pdc_init mpiexec 2 4
-
-* The above command will start a server with 2 processes. Then it will start the application program with 4 processes. Finally, all servers are closed.
-* On supercomputers, "mpiexec" can be replaced with "srun", "jsrun" or "aprun".
\ No newline at end of file
diff --git a/docs/source/images/.DS_Store b/docs/source/images/.DS_Store
deleted file mode 100644
index 44e11dc0c..000000000
Binary files a/docs/source/images/.DS_Store and /dev/null differ
diff --git a/docs/source/images/pdc.png b/docs/source/images/pdc.png
deleted file mode 100644
index c5918f471..000000000
Binary files a/docs/source/images/pdc.png and /dev/null differ
diff --git a/docs/source/images/pdclogo.png b/docs/source/images/pdclogo.png
deleted file mode 100644
index 7789690fc..000000000
Binary files a/docs/source/images/pdclogo.png and /dev/null differ
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 13f3fbf96..700ef41f4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,57 +1,44 @@
-.. PDC documentation master file, created by
- sphinx-quickstart on Thu Apr 15 14:28:56 2021.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
Proactive Data Containers (PDC)
===============================
-Proactive Data Containers (PDC) software provides an object-focused data management API, a runtime system with a set of scalable data object management services, and tools for managing data objects stored in the PDC system. The PDC API allows efficient and transparent data movement in complex memory and storage hierarchy. The PDC runtime system performs data movement asynchronously and provides scalable metadata operations to find and manipulate data objects. PDC revolutionizes how data is managed and accessed by using object-centric abstractions to represent data that moves in the high-performance computing (HPC) memory and storage subsystems. PDC manages extensive metadata to describe data objects to find desired data efficiently as well as to store information in the data objects.
+Welcome to the documentation for **Proactive Data Containers (PDC)** —
+a high-performance data management framework designed to support flexible, scalable,
+and efficient data movement and access across diverse memory and storage systems.
+
+Whether you're a new user, a developer integrating PDC into your application, or a
+contributor to the project, this documentation provides the resources you need to get started and dive deeper.
-More information and publications of PDC is available at https://sdm.lbl.gov/pdc
+Here’s how to navigate:
-If you use PDC in your research, please use the following citation:
+- **Getting Started**
+ Learn the basics of PDC, including core concepts, architecture, and key services. Start here if you're new to PDC.
+- **Developer & API Guide**
+ Explore the client API, internal design considerations, and tools to help you develop and debug PDC-based applications.
+- **Community and Contributions**
+ Find guidelines for contributing to PDC and additional reference material in the appendix.
-Byna, Suren, Dong, Bin, Tang, Houjun, Koziol, Quincey, Mu, Jingqing, Soumagne, Jerome, Vishwanath, Venkat, Warren, Richard, and Tessier, François. Proactive Data Containers (PDC) v0.1. Computer Software. https://github.com/hpc-io/pdc. USDOE. 11 May. 2017. Web. doi:10.11578/dc.20210325.1.
+Use the table of contents below to explore the sections in more detail
.. toctree::
:maxdepth: 2
:caption: Getting Started
- getting_started
-
-.. toctree::
- :maxdepth: 2
- :caption: Overview
-
introduction
- hdf5vol
+ core_concepts
+ using_pdc
+ advanced_topics
.. toctree::
:maxdepth: 2
- :caption: Resources
-
- hellopdcexample
- examples
- api
- tools
+ :caption: Client API/Tools & Develop Guide
+ client_api
+ client_tools
+ developer_guide
.. toctree::
- :maxdepth: 2
- :caption: Developer Documentation
-
- developer-notes
- documentation/objects
- documentation/regions
- documentation/query
- documentation/analysis
- documentation/transformation
-
-
-Indices and tables
-==================
+ :maxdepth: 1
+ :caption: Community and Contributions
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+ contributing
+ appendix
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
index c2a9f5564..2cc6078e9 100644
--- a/docs/source/introduction.rst
+++ b/docs/source/introduction.rst
@@ -1,14 +1,759 @@
-================================
-Introduction
-================================
+.. _introduction:
-Emerging high performance computing (HPC) systems are expected to be deployed with an unprecedented level of complexity, due to a very deep system memory/storage hierarchy. This hierarchy is expected to range from CPU cache through several levels of volatile memory to non-volatile memory, traditional hard disks, and tape. Simple and efficient methods of data management and movement through this hierarchy is critical for scientific applications using exascale systems. Existing storage system and I/O (SSIO) technologies face severe challenges in dealing with these requirements. POSIX and MPI I/O standards that are the basis for existing I/O libraries and parallel file systems present fundamental challenges in the areas of scalable metadata operations, semantics-based data movement performance tuning, asynchronous operation, and support for scalable consistency of distributed operations.
+**1.** Introduction
+===================
-Moving toward new paradigms for SSIO in the extreme-scale era, we propose to investigate novel object- based data abstractions and storage mechanisms that take advantage of the deep storage hierarchy and enable proactive automated performance tuning. In order to achieve these overarching goals, we propose a fundamental new data abstraction, called Proactive Data Containers (PDC). A PDC is a container within a locus of storage (memory, NVRAM, disk, etc.) that stores science data in an object-oriented manner. Managing data as objects enables powerful optimization opportunities for data movement and transformations. In this project, we will research: 1) formulation of object-oriented PDCs and their mapping in different levels of the exascale storage hierarchy; 2) efficient strategies for moving data in deep storage hierarchies using PDCs; 3) techniques for transforming and reorganizing data based on application requirements; and 4) novel analysis paradigms for enabling data transformations and user-defined analysis on data in PDCs. The intent of our research is to move the field of HPC SSIO in a direction where it may ultimately be possible to develop scientific applications without the need to perform cumbersome and inefficient tuning to optimize data movement on every system the application runs on.
+**1.1.** What is PDC
+--------------------
-.. image:: ../source/images/pdc.png
- :width: 600
- :align: center
- :alt: An overview of Proactive Data Container structures across multiple storage layers (or loci).
+Proactive Data Containers (PDC) software provides an object-focused data management API,
+a runtime system with a set of scalable data object management services, and tools for
+managing data objects stored in the PDC system. The PDC API allows efficient and
+transparent data movement in complex memory and storage hierarchy. The PDC runtime
+system performs data movement asynchronously and provides scalable metadata operations
+to find and manipulate data objects. PDC revolutionizes how data is managed and accessed
+by using object-centric abstractions to represent data that moves in the high-performance
+computing (HPC) memory and storage subsystems. PDC manages extensive metadata to describe
+data objects to find desired data efficiently as well as to store information in the data objects.
-PDCs will have an impact in many science areas, given the importance of the data management and I/O software stack in achieving science discoveries at scale. The foundations of the novel data management and storage paradigm approaches and formalisms proposed in this research are expected to be applicable to a broad range of scientific and engineering problems that utilize computational and experimental facilities for predictive understanding of physical processes through data analytics and visualization. The proposed techniques are expected to accelerate the crucial process of data-driven exploration and knowledge discovery. While we will work closely with a set of key DOE science applications in the areas of cosmology, climate, genomics, and high-energy density physics to evaluate our research, the proposed new I/O paradigm will be broadly applicable to all users of DOE HPC facilities.
\ No newline at end of file
+More information and publications about PDC are available at https://sdm.lbl.gov/pdc.
+
+If you use PDC in your research, please cite the following:
+
+Byna, Suren, Dong, Bin, Tang, Houjun, Koziol, Quincey, Mu, Jingqing,
+Soumagne, Jerome, Vishwanath, Venkat, Warren, Richard, and Tessier, François.
+*Proactive Data Containers (PDC) v0.1*. Computer Software. https://github.com/hpc-io/pdc.
+USDOE. 11 May. 2017. Web. doi:`10.11578/dc.20210325.1 `_
+
+**1.2.** Installation
+---------------------
+
+PDC offers the following methods for installing core dependencies:
+
+1. :ref:`Spack `
+2. :ref:`PDC Source `
+
+PDC offers the following installation targets:
+
+1. :ref:`C API `
+2. :ref:`Python API (PDCpy) `
+3. :ref:`HDF5 VOL Connector (VOL-PDC) `
+
+.. figure:: ../_static/image/pdc-installation.png
+ :alt: PDC Installation Diagram
+ :align: center
+ :class: bordered-image
+
+ Installation workflow to install the client targets offered by PDC.
+
+.. note::
+
+ All installation targets require the PDC core dependencies to be installed
+ either via spack or directly from the compiled source code.
+
+.. _link_spack:
+
+Spack
+~~~~~
+
+Spack is a package manager for supercomputers, Linux, and macOS.
+It makes installing scientific software easy.
+More information about Spack can be found at: https://spack.io.
+PDC and its dependencies can be installed with spack:
+
+.. code-block:: bash
+
+ # Clone the Spack repository
+ git clone -c feature.manyFiles=true https://github.com/spack/spack.git
+
+ # Source the Spack setup script
+ . ./spack/share/spack/setup-env.sh
+
+ # Create a new environment for PDC
+ spack env create pdc-env
+ spack env activate pdc-env
+
+ # Add PDC to the environment with tests enabled
+ HG_HOST=eth0 spack install --add --test=root --verbose pdc ^libfabric fabrics=tcp,rxm
+
+ # Load PDC to verify the installation
+ spack load pdc
+ pdc --version
+
+.. _pdc-source:
+
+.. note::
+
+ To view an exhaustive list of compile-time options please see :ref:`compile_time_options`.
+
+PDC Source
+~~~~~~~~~~
+
+We recommend using GCC version 7 or later. Intel and Cray compilers also work.
+
+When building PDC from source, either MPICH or OpenMPI can be used as the MPI library, if your system
+doesn't have one installed, follow `MPICH Installers Guide `_
+or `Installing Open MPI `_
+
+We provide detailed instructions for installing libfabric, Mercury, and the PDC library below.
+
+.. attention::
+
+ Following the instructions below will record all the environmental variables
+ needed to run PDC in the ``$WORK_SPACE/pdc_env.sh`` file, which can be used for
+ future PDC runs with ``source $WORK_SPACE/pdc_env.sh``.
+
+Prepare Work Space
+~~~~~~~~~~~~~~~~~~
+
+Before installing the dependencies and downloading the code repositories, we assume
+there is a directory created for your installation already, e.g. ``$WORK_SPACE`` and
+that you are in the ``$WORK_SPACE`` directory.
+
+.. code-block:: Bash
+
+ export WORK_SPACE=/path/to/your/work/space
+ mkdir -p $WORK_SPACE/source
+ mkdir -p $WORK_SPACE/install
+
+ cd $WORK_SPACE/source
+ git clone https://github.com/ofiwg/libfabric
+ git clone https://github.com/mercury-hpc/mercury --recursive
+ git clone https://github.com/hpc-io/pdc
+
+ export LIBFABRIC_SRC_DIR=$WORK_SPACE/source/libfabric
+ export MERCURY_SRC_DIR=$WORK_SPACE/source/mercury
+ export PDC_SRC_DIR=$WORK_SPACE/source/pdc
+
+ export LIBFABRIC_DIR=$WORK_SPACE/install/libfabric
+ export MERCURY_DIR=$WORK_SPACE/install/mercury
+ export PDC_DIR=$WORK_SPACE/install/pdc
+
+ mkdir -p $LIBFABRIC_SRC_DIR
+ mkdir -p $MERCURY_SRC_DIR
+ mkdir -p $PDC_SRC_DIR
+
+ mkdir -p $LIBFABRIC_DIR
+ mkdir -p $MERCURY_DIR
+ mkdir -p $PDC_DIR
+
+ # Save the environment variables to a file
+ echo "export LIBFABRIC_SRC_DIR=$LIBFABRIC_SRC_DIR" > $WORK_SPACE/pdc_env.sh
+ echo "export MERCURY_SRC_DIR=$MERCURY_SRC_DIR" >> $WORK_SPACE/pdc_env.sh
+ echo "export PDC_SRC_DIR=$PDC_SRC_DIR" >> $WORK_SPACE/pdc_env.sh
+ echo "export LIBFABRIC_DIR=$LIBFABRIC_DIR" >> $WORK_SPACE/pdc_env.sh
+ echo "export MERCURY_DIR=$MERCURY_DIR" >> $WORK_SPACE/pdc_env.sh
+ echo "export PDC_DIR=$PDC_DIR" >> $WORK_SPACE/pdc_env.sh
+
+From now on you can simply run the following commands to set the environment variables:
+
+.. code-block:: Bash
+
+ export WORK_SPACE=/path/to/your/work/space
+ source $WORK_SPACE/pdc_env.sh
+
+Install libfabric
+~~~~~~~~~~~~~~~~~
+
+.. code-block:: Bash
+
+ cd $LIBFABRIC_SRC_DIR
+ git checkout v1.18.0
+ ./autogen.sh
+ ./configure --prefix=$LIBFABRIC_DIR CC=mpicc CFLAG="-O2"
+ make -j && make install
+
+ # Test the installation
+ make check
+
+ # Set the environment variables
+ export LD_LIBRARY_PATH="$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH"
+ export PATH="$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH"
+ echo 'export LD_LIBRARY_PATH=$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh
+ echo 'export PATH=$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh
+
+.. note::
+
+ ``CC=mpicc`` may need to be changed to the corresponding compiler
+ in your system, e.g. ``CC=cc`` or ``CC=gcc``.
+ On Perlmutter@NERSC, ``--disable-efa --disable-sockets`` should be
+ added to the ``./configure`` command when compiling on login nodes.
+
+.. attention::
+
+ When installing on MacOS, make sure to enable ``sockets`` with the following configure command:
+ ``./configure CFLAG=-O2 --enable-sockets=yes --enable-tcp=yes --enable-udp=yes --enable-rxm=yes``
+
+Install Mercury
+~~~~~~~~~~~~~~~
+
+.. code-block:: Bash
+
+ cd $MERCURY_SRC_DIR
+
+ # Checkout a release version
+ git checkout v2.2.0
+ mkdir build
+ cd build
+ cmake -DCMAKE_INSTALL_PREFIX=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DBUILD_SHARED_LIBS=ON \
+ -DBUILD_TESTING=ON -DNA_USE_OFI=ON -DNA_USE_SM=OFF -DNA_OFI_TESTING_PROTOCOL=tcp ../
+ make -j && make install
+
+ # Test the installation
+ ctest
+
+ # Set the environment variables
+ export LD_LIBRARY_PATH="$MERCURY_DIR/lib:$LD_LIBRARY_PATH"
+ export PATH="$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH"
+ echo 'export LD_LIBRARY_PATH=$MERCURY_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh
+ echo 'export PATH=$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh
+
+.. note::
+
+ ``CC=mpicc`` may need to be changed to the corresponding compiler in your system, e.g.
+ ``-DCMAKE_C_COMPILER=cc`` or ``-DCMAKE_C_COMPILER=gcc``.
+ Make sure the ctest passes. PDC may not work without passing
+ all the tests of Mercury.
+
+.. attention::
+
+ When installing on MacOS, specify the ``sockets`` protocol used by Mercury by replacing
+ the cmake command from ``-DNA_OFI_TESTING_PROTOCOL=tcp`` to ``-DNA_OFI_TESTING_PROTOCOL=sockets``
+
+Install PDC Source
+~~~~~~~~~~~~~~~~~~
+
+.. code-block:: Bash
+
+ cd $PDC_SRC_DIR
+ git checkout develop
+ mkdir build
+ cd build
+ cmake -DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=$PDC_DIR \
+ -DPDC_ENABLE_MPI=ON -DMERCURY_DIR=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DMPI_RUN_CMD=mpiexec ../
+ make -j && make install
+
+ # Set the environment variables
+ export LD_LIBRARY_PATH="$PDC_DIR/lib:$LD_LIBRARY_PATH"
+ export PATH="$PDC_DIR/include:$PDC_DIR/lib:$PATH"
+ echo 'export LD_LIBRARY_PATH=$PDC_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh
+ echo 'export PATH=$PDC_DIR/include:$PDC_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh
+
+.. _compile_time_options:
+
+Compile-Time Options
+~~~~~~~~~~~~~~~~~~~~
+
+The following table lists all available compile-time options for PDC, along with a description of each and their current support status:
+
+.. list-table:: Compile-Time Macros
+ :header-rows: 1
+ :widths: 30 10 50 10
+
+ * - Option Name
+ - Default
+ - Description
+ - Support
+ * - BUILD_MPI_TESTING
+ - ON
+ - Build MPI testing.
+ - 🟢
+ * - BUILD_SHARED_LIBS
+ - ON
+ - Build with shared libraries.
+ - 🟢
+ * - BUILD_TESTING
+ - ON
+ - Build the testing tree.
+ - 🟢
+ * - BUILD_TOOLS
+ - OFF
+ - Build tools.
+ - 🟢
+ * - PDC_DART_SUFFIX_TREE_MODE
+ - ON
+ - Enable DART Suffix Tree mode.
+ - 🟢
+ * - PDC_ENABLE_APP_CLOSE_SERVER
+ - OFF
+ - Close PDC server at the end of the application.
+ - 🟢
+ * - PDC_ENABLE_CHECKPOINT
+ - ON
+ - Enable checkpointing.
+ - 🟢
+ * - PDC_ENABLE_FASTBIT
+ - OFF
+ - Enable FastBit.
+ - 🟢
+ * - PDC_ENABLE_JULIA_SUPPORT
+ - OFF
+ - Enable Julia support.
+ - 🟢
+ * - PDC_ENABLE_LUSTRE
+ - OFF
+ - Enable Lustre.
+ - 🟢
+ * - PDC_ENABLE_MPI
+ - ON
+ - Enable MPI.
+ - 🟢
+ * - PDC_ENABLE_MULTITHREAD
+ - OFF
+ - Enable multithreading.
+ - 🟡
+ * - PDC_ENABLE_PROFILING
+ - OFF
+ - Enable profiling.
+ - 🔴
+ * - PDC_ENABLE_ROCKSDB
+ - OFF
+ - Enable RocksDB (experimental).
+ - 🟢
+ * - PDC_ENABLE_SQLITE3
+ - OFF
+ - Enable SQLite3 (experimental).
+ - 🟢
+ * - PDC_ENABLE_TF_ZFP_COMPRESSION
+ - ON
+ - TensorFlow + ZFP compression (no inline help).
+ - 🟡
+ * - PDC_ENABLE_WAIT_DATA
+ - OFF
+ - Wait for data finalized in FS when object unmap is called.
+ - 🟢
+ * - PDC_ENABLE_ZFP
+ - OFF
+ - Enable ZFP.
+ - 🟡
+ * - PDC_HAVE_ATTRIBUTE_UNUSED
+ - ON
+ - Use compiler attribute for unused variables.
+ - 🟢
+ * - PDC_SERVER_CACHE
+ - ON
+ - Enable server caching.
+ - 🟢
+ * - PDC_TIMING
+ - OFF
+ - Enable timing.
+ - 🟡
+ * - PDC_USE_CRAY_DRC
+ - OFF
+ - Use Cray DRC to allow multi-job communication.
+ - 🟢
+ * - PDC_USE_SHARED_SERVER
+ - OFF
+ - Use shared server with client mode.
+ - 🟢
+
+Legend:
+
+- 🟢 = Fully supported
+- 🟡 = Partially/experimentally supported
+- 🔴 = Not supported or currently disabled
+
+.. note::
+
+ ``-DCMAKE_C_COMPILER=mpicc -DMPI_RUN_CMD=mpiexec`` may need to be
+ changed to ``-DCMAKE_C_COMPILER=cc -DMPI_RUN_CMD=srun`` depending on your system environment.
+
+ If you are trying to compile PDC on MacOS, ``LibUUID`` needs to be installed
+ on your MacOS first. Simple use ``brew install ossp-uuid`` to install it.
+ If you are trying to compile PDC on Linux, you should also make sure ``LibUUID``
+ is installed on your system. If not, you can install it with
+ ``sudo apt-get install uuid-dev`` on Ubuntu or ``yum install libuuid-devel`` on CentOS.
+
+ In MacOS you also need to export the following environment variable so PDC
+ (i.e., Mercury) uses the ``socket`` protocol, the only one supported in
+ MacOS: ``export HG_TRANSPORT="sockets"``.
+
+Test Your PDC Source Installation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+PDC has both sequential and parallel (MPI) tests which can be run
+with using the following command in the ``build`` directory.
+
+.. code-block:: Bash
+
+ ctest
+
+You can also specify a timeout (e.g., 2 minutes) for the tests by specifying
+the ``timeout`` parameter when calling ``ctest``:
+
+.. code-block:: Bash
+
+ ctest --timeout 120
+
+If PDC was built without support for MPI, you can run only the sequential (non-MPI) tests
+using the ``-L serial`` parameter:
+
+.. code-block:: Bash
+
+ ctest -L serial
+
+.. note::
+
+ If you are using PDC on an HPC system, e.g. Perlmutter@NERSC, ``ctest`` should be run
+ on a compute node, you can submit an interactive job on Perlmutter:
+ ``salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=mxxxx``
+
+.. _c-api:
+
+**1.3.** C API
+--------------
+
+.. _python-api-pdc-py:
+
+**1.4.** Python API (PDCpy)
+---------------------------------
+
+Due to the rise of Python in the HPC community, PDC provides Python
+bindings, which allows users to interact with PDC using Python.
+The repository for PDCpy can be found at `PDCpy GitHub Repository `_.
+The documentation for PDCpy's API is available at `PDCpy Documentation `_.
+
+PDCpy is compatible with OpenMPI and MPICH. If neither MPI library is installed,
+it will attempt to compile without MPI support, which will fail if you compile
+PDC with MPI support.
+
+Dependencies
+~~~~~~~~~~~~
+
+First ensure PDC is installed either compiled directly from the
+source code or via spack (see instructions above).
+
+.. note::
+
+ The Python interface currently only works with the `develop`
+ branch of PDC.
+
+Then clone the PDCpy repository:
+
+.. code-block:: Bash
+
+ git clone https://github.com/hpc-io/PDCpy.git
+
+Installation
+~~~~~~~~~~~~
+
+Make sure the following environment variables are correct:
+
+1. `PDC_DIR`: path to PDC installation
+2. `MERCURY_DIR`: path to mercury installation
+3. `LD_LIBRARY_PATH`: contains path to `libpdc.so`
+
+.. code-block:: Bash
+
+ pip install PDCpy
+
+
+.. _hdf5-vol-connector:
+
+**1.5.** HDF5 VOL Connector (VOL-PDC)
+-------------------------------------
+
+The following instructions are for installing PDC on Linux and Cray machines.
+These instructions assume that PDC and its dependencies have all already been
+installed from source (libfabric and Mercury).
+
+Building HDF5
+~~~~~~~~~~~~~
+
+First set ``HDF5_DIR`` to the directory where you want to install HDF5, e.g. ``$WORK_SPACE/install/hdf5``.
+
+.. code-block:: bash
+
+ wget "https://www.hdfgroup.org/package/hdf5-1-12-1-tar-gz/?wpdmdl=15727&refresh=612559667d6521629837670"
+ mv index.html?wpdmdl=15727&refresh=612559667d6521629837670 hdf5-1.12.1.tar.gz
+ tar zxf hdf5-1.12.1.tar.gz
+ cd hdf5-1.12.1
+ ./configure --prefix=$HDF5_DIR
+ make
+ make check
+ make install
+ make check-install
+
+Building VOL-PDC
+~~~~~~~~~~~~~~~~
+
+First set ``HDF5_INCLUDE_DIR``, ``HDF5_LIBRARY``, and ```HDF5_DIR``` to the
+appropriate paths where HDF5 is installed, e.g. ``$HDF5_DIR/include```,
+``$HDF5_DIR/lib```, and ``$HDF5_DIR`` respectively.
+
+.. code-block:: bash
+
+ git clone https://github.com/hpc-io/vol-pdc.git
+ cd vol-pdc
+ mkdir build
+ cd build
+ cmake ../ -DHDF5_INCLUDE_DIR=$HDF5_INCLUDE_DIR -DHDF5_LIBRARY=$HDF5_LIBRARY -DBUILD_SHARED_LIBS=ON -DHDF5_DIR=$HDF5_DIR
+ make
+ make install
+
+Building Running VOL-PDC Examples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The VOL-PDC examples can be built with the following commands:
+
+.. code-block:: bash
+
+ cd vol-pdc/examples
+ cmake .
+ make
+
+The following assumes ``PDC_BIN_DIR`` is set to the directory
+where the PDC binaries are installed, e.g. ``$PDC_DIR/bin``.
+Then, to run the any of the examples you first start the PDC server(s).
+You can then launch the example. Finally, you must close the PDC server(s).
+For instance, to run the ``h5pdc_vpicio`` example, you can use the following commands:
+
+.. code-block:: bash
+
+ # Start the PDC server(s) in the background
+ mpirun -N 1 -n 1 -c 1 ./$PDC_BIN_DIR/pdc_server &
+ # Run the example
+ mpirun -N 1 -n 1 -c 1 ./h5pdc_vpicio test
+ # Close the PDC server(s)
+ mpirun -N 1 -n 1 -c 1 ./$PDC_BIN_DIR/close_server
+
+**1.6** Managing PDC Server(s)
+------------------------------
+
+PDC works in a client-server architecture, therefore, before running any PDC
+client application, you need to start the PDC server(s) first.
+First ensure that the PDC server is built and installed correctly,
+then you can start a single PDC server instance with the following command:
+
+.. code-block:: bash
+
+ pdc_server
+
+You can also start multiple PDC server instances on different nodes,
+for example, you can start 4 PDC servers using the following command:
+
+.. code-block:: bash
+
+ mpirun -np 4 pdc_server
+
+The following command shows how to close a PDC server:
+
+.. code-block:: bash
+
+ close_server
+
+If multiple PDC servers were launched using ``mpirun`` they can be closed with the following command:
+
+.. code-block:: bash
+
+ mpirun -np 4 close_server
+
+If there is pre-existing data that needs to be loaded, then ``pdc_server``
+must be launched with the ``restart`` parameter as shown below:
+
+.. code-block:: bash
+
+ mpirun -np 4 pdc_server restart
+
+.. important::
+
+ If ``pdc_server`` is not launched with the ``restart`` command it will not
+ load the pre-existing data.
+
+**1.7.** First PDC Program
+--------------------------
+
+This section offers the following examples for different PDC target installations:
+
+1. :ref:`C API First Program `
+2. :ref:`PDCpy First Program `
+3. :ref:`VOL-PDC First Program `
+
+.. note::
+
+ All examples omit detailed error checking for clarity. In practice, always check the return values of PDC API calls.
+ See the section TODO_FIX_REFERENCE for more information on detecting and handling PDC errors.
+
+.. _c-api-first-program:
+
+C API First Program
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+ :linenos:
+
+ #include
+
+ int main() {
+ // Initialize PDC runtime environment
+ pdcid_t pdc_id = PDCinit("pdc");
+
+ // Create container
+ pdcid_t cont_id = PDCcont_create(pdc_id, "my_container", PDC_CONT_CREATE_DEFAULT);
+
+ // Define object dimensions and properties
+ int region_size = 64;
+ uint64_t dims[1] = {region_size};
+ pdcid_t obj_prop = PDCprop_create(PDC_OBJ_CREATE, pdc_id);
+ PDCprop_set_obj_type(obj_prop, PDC_DOUBLE);
+ PDCprop_set_obj_dims(obj_prop, 1, dims);
+
+ // Create object
+ pdcid_t obj_id = PDCobj_create(cont_id, "my_object", obj_prop);
+
+ // Prepare data
+ int data[64] = {0};
+
+ // Define regions
+ uint64_t offset[1] = {0};
+ pdcid_t local_region = PDCregion_create(1, offset, dims);
+ pdcid_t global_region = PDCregion_create(1, offset, dims);
+
+ // Transfer data
+ pdcid_t transfer_request = PDCregion_transfer_create(data, PDC_WRITE, obj_id, local_region, global_region);
+ PDCregion_transfer_start(transfer_request);
+ PDCregion_transfer_wait(transfer_request);
+
+ // Clean up
+ PDCregion_transfer_close(transfer_request);
+ PDCregion_close(local_region);
+ PDCregion_close(global_region);
+ PDCobj_close(obj_id);
+ PDCcont_close(cont_id);
+ PDCclose(pdc_id);
+
+ return 0;
+ }
+
+It first initializes the PDC environment and creates a
+container and object with specified properties (lines 7-21). It then
+prepares a data buffer and defines local and global regions representing
+the data range to transfer (lines 23-29). The program performs a region-based
+write transfer of the data to the PDC object, starting and waiting for the
+transfer to complete (lines 31-33). Finally, it cleans up all PDC resources
+by closing the transfer request, regions, object, container, and the
+PDC context itself (lines 35-40).
+
+.. _pdcpy-first-program:
+
+PDCpy First Program
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+ :linenos:
+
+ import pdc
+ import numpy as np
+
+ def main():
+ cont = pdc.Container()
+ prop = Object.Properties(
+ 64,
+ pdc.Type.DOUBLE,
+ )
+ obj = cont.create_object("my_object", prop)
+ data = np.fromiter(np.double, 64)
+ obj.set_data(data)
+
+It begins by creating a PDC container and defining object properties such as size
+and data type (lines 6-9). An object is then created within the container using
+these properties (line 10). A NumPy array of 64 double-precision values is prepared
+to serve as the data buffer (line 11). The data is written to the PDC object using
+the set_data() method (line 12), which handles the region creation and data transfer
+internally.
+
+.. _vol-pdc-first-program:
+
+VOL-PDC First Program
+~~~~~~~~~~~~~~~~~~~~~
+
+To use the PDC VOL connector with the HDF5 C API, ensure that your environment is configured as follows:
+
+.. code-block:: bash
+
+ export HDF5_PLUGIN_PATH=$VOL_DIR/lib
+ export HDF5_VOL_CONNECTOR="pdc under_vol=0;under_info={}"
+ export LD_LIBRARY_PATH="$LIBFABRIC_DIR/lib:$MERCURY_DIR/lib:$PDC_DIR/lib:$VOL_DIR/lib:$LD_LIBRARY_PATH"
+ # Optional: preload the connector
+ export LD_PRELOAD=$VOL_DIR/install/lib/libhdf5_vol_pdc.so
+
+With this configuration, HDF5 operations in your C application will transparently use PDC for data management.
+
+Here is a simple HDF5 program in C that creates a dataset and writes a buffer to it using PDC as the underlying VOL connector:
+
+.. code-block:: c
+ :linenos:
+
+ #include "hdf5.h"
+ #include
+
+ #define FILE_NAME "example.h5"
+ #define DATASET_NAME "my_dataset"
+ #define DIM0 64
+
+ int main() {
+ hid_t file_id, dataspace_id, dataset_id;
+ herr_t status;
+
+ // Initialize data
+ double data[DIM0];
+ for (int i = 0; i < DIM0; i++)
+ data[i] = (double)i;
+
+ // Create a new file using the default properties (VOL connector is set via env)
+ file_id = H5Fcreate(FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+
+ // Define dataspace for dataset
+ hsize_t dims[1] = {DIM0};
+ dataspace_id = H5Screate_simple(1, dims, NULL);
+
+ // Create the dataset
+ dataset_id = H5Dcreate2(file_id, DATASET_NAME, H5T_NATIVE_DOUBLE,
+ dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+ // Write data to the dataset
+ status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
+
+ // Close resources
+ H5Dclose(dataset_id);
+ H5Sclose(dataspace_id);
+ H5Fclose(file_id);
+
+ if (status < 0) {
+ fprintf(stderr, "Error writing data\n");
+ return 1;
+ }
+
+ printf("Data written successfully using PDC VOL.\n");
+ return 0;
+ }
+
+This example performs a standard HDF5 dataset creation and write operation.
+With the VOL environment variables set appropriately HDF5 will use
+PDC as the back-end. This makes it easy to adopt PDC in existing HDF5 workflows.
+
+
+**1.8.** Common Installation Errors
+-----------------------------------
+
+No Provider Found
+~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+ pdc_server
+ [INFO] PDC_SERVER[0]: Using [./pdc_tmp/] as tmp dir, 1 OSTs, 1 OSTs per data file, 0% to BB
+ [INFO] PDC_SERVER[0]: Environment variable HG_TRANSPORT was NOT set
+ [INFO] PDC_SERVER[0]: Environment variable HG_HOST was NOT set
+ [INFO] PDC_SERVER[0]: Connection string: ofi+tcp://ta1-pc:7000
+ # [85521.394212] mercury->fatal: [error] /home/ta1/src/workspace/source/mercury/src/na/na_ofi.c:2832
+ # na_ofi_verify_info(): No provider found for "tcp;ofi_rxm" provider on domain "ta1-pc"
+ [15:47:33.391315] [ERROR] [pdc_server.c:837] PDC_SERVER[0]: Error with HG_Init()
+ [15:47:33.391329] [ERROR] [pdc_server.c:2164] PDC_SERVER[0]: Error with PDC_Server_init
+ [15:47:33.391343] [ERROR] [pdc_server.c:990] PDC_SERVER[0]: pdc_remote_server_info_g was NULL
+ [15:47:33.391347] [ERROR] [pdc_server.c:1047] PDC_SERVER[0]: Error with PDC_Server_destroy_client_info
+
+Mercury was unable to find a valid provider based on your hostname (``ta1-pc`` in this case).
+Please review the connection string (``ofi+tcp://ta1-pc:7000``) and ensure that the appropriate
+transport and host are set. If you're unsure which transport or host to use, you can run ``fi_info``
+(found in the ``bin`` directory of your libfabric installation) to list available network providers.
\ No newline at end of file
diff --git a/docs/source/tools.rst b/docs/source/tools.rst
deleted file mode 100644
index 90e5bfc0c..000000000
--- a/docs/source/tools.rst
+++ /dev/null
@@ -1,235 +0,0 @@
-================================
-PDC Tools
-================================
-
-++++++++++++++++++++++++++++++++++
-Build Instructions
-++++++++++++++++++++++++++++++++++
-
-.. code-block:: Bash
-
- $ cd tools
- $ cmake .
- $ make
-
-++++++++++++++++++++++++++++++++++
-Commands
-++++++++++++++++++++++++++++++++++
-
-pdc_ls
----------------------------
-Takes in a directory containing PDC metadata checkpoints or an individual metadata checkpoint file and outputs information on objects saved in the checkpoint(s).
-
-Usage: :code:`./pdc_ls `:
-
-Arguments:
-
-- :code:`-json `: save the output to a specified file in json format.
-- :code:`-n `: only display objects with a specific object name . Regex matching of object names is supported.
-- :code:`-i `: only display objects with a specific object ID . Regex matching of object IDs is supported
-- :code:`-ln`: list out all object names as an additional field in the output.
-- :code:`-li`: list out all object IDs as an additional field in the output.
-- :code:`-s`: display summary statistics (number of objects found, number of containers found, number of regions found) as an additional field in the output.
-
-Examples:
-
-.. code-block:: Bash
-
- $ ./pdc_ls pdc_tmp -n id.*
- [INFO] File [pdc_tmp/metadata_checkpoint.0] last modified at: Fri Mar 11 14:19:13 2022
-
- {
- "cont_id: 1000000": [{
- "obj_id": 1000007,
- "app_name": "VPICIO",
- "obj_name": "id11",
- "user_id": 0,
- "tags": "ag0=1",
- "data_type": "PDC_INT",
- "num_dims": 1,
- "dims": [8388608],
- "time_step": 0,
- "region_list_info": [{
- "storage_loc": "/user/pdc_data/1000007/server0/s0000.bin",
- "offset": 33554432,
- "num_dims": 1,
- "start": [0],
- "count": [8388608],
- "unit_size": 4,
- "data_loc_type": "PDC_NONE"
- }]
- }, {
- "obj_id": 1000008,
- "app_name": "VPICIO",
- "obj_name": "id22",
- "user_id": 0,
- "tags": "ag0=1",
- "data_type": "PDC_INT",
- "num_dims": 1,
- "dims": [8388608],
- "time_step": 0,
- "region_list_info": [{
- "storage_loc": "/user/pdc_data/1000008/server0/s0000.bin",
- "offset": 33554432,
- "num_dims": 1,
- "start": [0],
- "count": [8388608],
- "unit_size": 4,
- "data_loc_type": "PDC_NONE"
- }]
- }]
- }
-
-.. code-block:: Bash
-
- $ ./pdc_ls pdc_tmp -n obj-var-p.* -ln -li
- [INFO] File [pdc_tmp/metadata_checkpoint.0] last modified at: Fri Mar 11 14:19:13 2022
-
- {
- "cont_id: 1000000": [{
- "obj_id": 1000004,
- "app_name": "VPICIO",
- "obj_name": "obj-var-pxx",
- "user_id": 0,
- "tags": "ag0=1",
- "data_type": "PDC_FLOAT",
- "num_dims": 1,
- "dims": [8388608],
- "time_step": 0,
- "region_list_info": [{
- "storage_loc": "/user/pdc_data/1000004/server0/s0000.bin",
- "offset": 33554432,
- "num_dims": 1,
- "start": [0],
- "count": [8388608],
- "unit_size": 4,
- "data_loc_type": "PDC_NONE"
- }]
- }, {
- "obj_id": 1000005,
- "app_name": "VPICIO",
- "obj_name": "obj-var-pyy",
- "user_id": 0,
- "tags": "ag0=1",
- "data_type": "PDC_FLOAT",
- "num_dims": 1,
- "dims": [8388608],
- "time_step": 0,
- "region_list_info": [{
- "storage_loc": "/user/pdc_data/1000005/server0/s0000.bin",
- "offset": 33554432,
- "num_dims": 1,
- "start": [0],
- "count": [8388608],
- "unit_size": 4,
- "data_loc_type": "PDC_NONE"
- }]
- }, {
- "obj_id": 1000006,
- "app_name": "VPICIO",
- "obj_name": "obj-var-pzz",
- "user_id": 0,
- "tags": "ag0=1",
- "data_type": "PDC_FLOAT",
- "num_dims": 1,
- "dims": [8388608],
- "time_step": 0,
- "region_list_info": [{
- "storage_loc": "/user/pdc_data/1000006/server0/s0000.bin",
- "offset": 33554432,
- "num_dims": 1,
- "start": [0],
- "count": [8388608],
- "unit_size": 4,
- "data_loc_type": "PDC_NONE"
- }]
- }],
- "all_obj_names": ["obj-var-pxx", "obj-var-pyy", "obj-var-pzz"],
- "all_obj_ids": [1000004, 1000005, 1000006]
- }
-
-
-pdc_import
----------------------------
-Takes in file containing line separated paths to HDF5 files and converts those HDF5 files to a PDC checkpoint.
-
-Usage: :code:`./pdc_ls `:
-
-Arguments:
-
-- :code:`-a `: Uses the specified as application name when creating PDC objects.
-- :code:`-o`: Specifies whether or not to overwrite pre-existing PDC objects when writing a PDC object that already exists.
-Examples:
-
-.. code-block:: Bash
-
- $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap /path/to/pdc_server &
- ==PDC_SERVER[0]: using [./pdc_tmp/] as tmp dir, 1 OSTs, 1 OSTs per data file, 0% to BB
- ==PDC_SERVER[0]: using ofi+tcp
- ==PDC_SERVER[0]: without multi-thread!
- ==PDC_SERVER[0]: Read cache enabled!
- ==PDC_SERVER[0]: Successfully established connection to 0 other PDC servers
- ==PDC_SERVER[0]: Server ready!
-
- $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap ./pdc_import file_names_list
- ==PDC_CLIENT: PDC_DEBUG set to 0!
- ==PDC_CLIENT[0]: Found 1 PDC Metadata servers, running with 1 PDC clients
- ==PDC_CLIENT: using ofi+tcp
- ==PDC_CLIENT[0]: Client lookup all servers at start time!
- ==PDC_CLIENT[0]: using [./pdc_tmp] as tmp dir, 1 clients per server
- Running with 1 clients, 1 files
- Importer 0: I will import 1 files
- Importer 0: [../../test.h5]
- Importer 0: processing [../../test.h5]
- Importer 0: Created container [/]
-
- ==PDC_SERVER[0]: Checkpoint file [./pdc_tmp/metadata_checkpoint.0]
- Import 8 datasets with 1 ranks took 0.93 seconds.
-
-
-pdc_export
----------------------------
-Converts PDC metadata checkpoint to a file of specified format. Currently only HDF5 is supported.
-
-Usage: :code:`./pdc_ls `:
-
-Arguments:
-
-- :code:`-f `: Uses the specified export . Currently only supports HDF5 exports.
-
-Examples:
-
-.. code-block:: Bash
-
- $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap /path/to/pdc_server &
- ==PDC_SERVER[0]: using [./pdc_tmp/] as tmp dir, 1 OSTs, 1 OSTs per data file, 0% to BB
- ==PDC_SERVER[0]: using ofi+tcp
- ==PDC_SERVER[0]: without multi-thread!
- ==PDC_SERVER[0]: Read cache enabled!
- ==PDC_SERVER[0]: Successfully established connection to 0 other PDC servers
- ==PDC_SERVER[0]: Server ready!
-
- $ srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores --gres=craynetwork:1 --overlap ./pdc_export pdc_tmp
- ==PDC_CLIENT: PDC_DEBUG set to 0!
- ==PDC_CLIENT[0]: Found 1 PDC Metadata servers, running with 1 PDC clients
- ==PDC_CLIENT: using ofi+tcp
- ==PDC_CLIENT[0]: Client lookup all servers at start time!
- ==PDC_CLIENT[0]: using [./pdc_tmp] as tmp dir, 1 clients per server
- [INFO] File [pdc_tmp/metadata_checkpoint.0] last modified at: Mon May 9 06:17:18 2022
-
- POSIX read from file offset 117478480, region start = 0, region size = 8388608
- POSIX read from file offset 130024208, region start = 0, region size = 8388608
- POSIX read from file offset 130057104, region start = 0, region size = 8388608
- POSIX read from file offset 130056720, region start = 0, region size = 8388608
- POSIX read from file offset 130023696, region start = 0, region size = 8388608
- POSIX read from file offset 130056592, region start = 0, region size = 8388608
- POSIX read from file offset 130056720, region start = 0, region size = 8388608
- POSIX read from file offset 130023696, region start = 0, region size = 8388608
-
-
-.. warning::
- PDC tools currently does not support compound data types and will have unexpected behavior when attempting to work with compound data types.
-
-
-
-
diff --git a/docs/source/using_pdc.rst b/docs/source/using_pdc.rst
new file mode 100644
index 000000000..b06585b14
--- /dev/null
+++ b/docs/source/using_pdc.rst
@@ -0,0 +1,446 @@
+.. _using_pdc:
+
+**3.** Using PDC
+==================
+
+This section provides a practical overview of how to use the PDC
+library to manage and transfer data in high-performance computing environments.
+It walks through the essential steps of initializing PDC, creating containers
+and objects, defining regions, and performing data transfers.
+
+Basic Usage
+~~~~~~~~~~~
+
+- :ref:`3.1. Initializing PDC `
+- :ref:`3.2. Container Lifecycle `
+- :ref:`3.3. Object Lifecycle `
+- :ref:`3.4. Region Transfer Lifecycle `
+
+Complete Examples
+~~~~~~~~~~~~~~~~~
+
+- :ref:`Example: 2D Region Transfers <2D-region-transfer>`
+- :ref:`Example: 2D Batch Region Transfer <2D-batch-region-transfer>`
+- :ref:`Example: Get & Put Object `
+
+3.1. Initializing PDC
+---------------------
+
+.. _initializing-pdc:
+
+Prior to any interaction with PDC, the user needs to initialize it as shown below:
+
+.. code-block:: C
+
+ pdcid_t pdc_id = PDCinit("pdc");
+
+At the end of the application a corresponding deinitialization function should be called:
+
+.. code-block:: C
+
+ PDCclose(pdc_id);
+
+.. note::
+
+ Users should check that every PDC API call succeeds.
+ In general, if a function returns a ``pdcid_t``, `0` indicates an error.
+ If a function returns a ``perr_t``, a negative value indicates an error.
+
+
+3.2. Container Lifecycle
+-------------------------
+
+.. _container-lifecycle:
+
+Containers store objects and provide users a way to organize their data.
+Before creating a container, a container property must be constructed.
+The container property provides users a method for customizing a container's behavior.
+For an exhaustive list of container properties, please see FIXME.
+
+This is shown in the example below:
+
+.. code-block:: C
+
+ pdcid_t cont_prop_id = PDCprop_create(PDC_CONT_CREATE, pdc_id);
+
+ // Independent container creation
+ pdcid_t cont_id = PDCcont_create("cont", cont_prop_id)
+
+ // Collective container creation
+ pdcid_t cont_col_id = PDCcont_create_col("cont", cont_prop_id);
+
+To open an existing container:
+
+.. code-block:: C
+
+ pdcid_t cont_id = PDCcont_open("cont");
+
+The following functions should be used to free both the container and its associated property resources:
+
+.. code-block:: C
+
+ PDCprop_close(cont_prop_id);
+ PDCcont_close(cont_id);
+ PDCcont_close(cont_col_id);
+
+3.3. Object Lifecycle
+---------------------
+
+.. _object-lifecycle:
+
+Objects represent user data and are the entities stored within containers in PDC.
+Before creating an object, an object property must be defined, which
+specifies metadata such as dimensionality, size, data type, and region partitioning.
+For an exhaustive list of object properties, please see FIXME.
+This allows for fine-grained control over how data is laid out and accessed.
+
+Below is an example of setting up an object property and creating objects:
+
+.. code-block:: C
+
+ // Create object property
+ pdcid_t obj_prop_id = PDCprop_create(PDC_OBJ_CREATE, pdc_id);
+
+ // Set properties: type, dims, etc.
+ uint64_t dims[1] = {1024};
+ PDCprop_set_obj_dims(obj_prop_id, dims);
+ PDCprop_set_obj_type(obj_prop_id, PDC_FLOAT);
+
+ // Independent object creation
+ pdcid_t obj_id = PDCobj_create(cont_id, "obj", obj_prop_id);
+
+ // Collective object creation
+ pdcid_t obj_col_id = PDCobj_create_col(cont_id, "obj", obj_prop_id, my_rank, comm);
+
+To open an existing object by name within a container:
+
+.. code-block:: C
+
+ pdcid_t obj_id = PDCobj_open(cont_id, "obj");
+
+When the object and its property are no longer needed, they should be closed to free resources:
+
+.. code-block:: C
+
+ PDCprop_close(obj_prop_id);
+ PDCobj_close(obj_id);
+ PDCobj_close(obj_col_id);
+
+3.4. Region Transfer Lifecycle
+------------------------------
+
+.. _region-lifecycle:
+
+Regions define logical subranges within a PDC object and are used to specify what part of the object's data will be transferred between memory and storage.
+
+Transfers can be performed in three main modes:
+
+- Individually, with ``PDCregion_transfer_start()``
+- Collectively, with ``PDCregion_transfer_start_mpi()`` across MPI processes
+- In batches, with ``PDCregion_transfer_start_all()`` and ``PDCregion_transfer_wait_all()``
+
+Basic Region Transfer
+~~~~~~~~~~~~~~~~~~~~~
+
+Create memory and object regions and initiate a transfer:
+
+.. code-block:: C
+
+ uint64_t offset[1] = {0};
+ uint64_t size[1] = {1024};
+
+ float *data_buf = malloc(sizeof(float) * size[0]);
+
+ pdcid_t mem_reg_id = PDCregion_create(1, offset, size);
+ pdcid_t obj_reg_id = PDCregion_create(1, offset, size);
+
+ pdcid_t xfer = PDCregion_transfer_create(data_buf, PDC_WRITE,
+ obj_id, obj_reg_id, mem_reg_id);
+
+ PDCregion_transfer_start(xfer);
+ PDCregion_transfer_wait(xfer);
+
+ PDCregion_transfer_close(xfer);
+ PDCregion_close(mem_reg_id);
+ PDCregion_close(obj_reg_id);
+ free(data_buf);
+
+Collective Transfer
+~~~~~~~~~~~~~~~~~~~
+
+If the transfer is intended to be performed collectively across MPI ranks, use:
+
+.. code-block:: C
+
+ PDCregion_transfer_start_mpi(xfer);
+
+This function should be called by all processes participating in
+the transfer and is useful for coordinated I/O in distributed
+applications. The rest of the transfer workflow (e.g., `PDCregion_transfer_wait()`)
+remains unchanged.
+
+Batch Region Transfer
+~~~~~~~~~~~~~~~~~~~~~
+
+For scenarios involving many objects or regions, PDC supports batch transfers to reduce overhead:
+
+.. code-block:: C
+
+ #define OBJ_NUM 10
+ #define BUF_LEN 256
+
+ int *data[OBJ_NUM];
+ pdcid_t transfer_requests[OBJ_NUM];
+ pdcid_t reg = PDCregion_create(1, offset, size);
+ pdcid_t reg_global = PDCregion_create(1, offset, size);
+
+ for (int i = 0; i < OBJ_NUM; ++i) {
+ data[i] = malloc(sizeof(int) * BUF_LEN);
+ for (int j = 0; j < BUF_LEN; ++j)
+ data[i][j] = j;
+
+ transfer_requests[i] = PDCregion_transfer_create(
+ data[i], PDC_WRITE, obj[i], reg, reg_global);
+ }
+
+ // Start all transfers in one batch
+ PDCregion_transfer_start_all(transfer_requests, OBJ_NUM);
+
+ // Wait for all to complete
+ PDCregion_transfer_wait_all(transfer_requests, OBJ_NUM);
+
+ for (int i = 0; i < OBJ_NUM; ++i) {
+ PDCregion_transfer_close(transfer_requests[i]);
+ free(data[i]);
+ }
+
+ PDCregion_close(reg);
+ PDCregion_close(reg_global);
+
+Complete Examples
+-----------------
+
+.. _2D-region-transfer:
+
+2D Region Transfer Example
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: C
+
+ #include
+ #include
+ #include
+ #include
+ #include "pdc.h"
+
+ #define BUF_LEN 128
+
+ int main(int argc, char **argv)
+ {
+ pdcid_t pdc, cont_prop, cont, obj_prop, reg, reg_global;
+ pdcid_t obj1, obj2;
+ char cont_name[128], obj_name1[128], obj_name2[128];
+ pdcid_t transfer_request;
+ int rank = 0, size = 1, i;
+ int ret_value = 0;
+ uint64_t offset[3], offset_length[3];
+ uint64_t dims[2];
+ int *data = (int *)malloc(sizeof(int) * BUF_LEN);
+ int *data_read = (int *)malloc(sizeof(int) * BUF_LEN);
+ dims[0] = BUF_LEN / 4;
+ dims[1] = 4;
+
+ #ifdef ENABLE_MPI
+ MPI_Init(&argc, &argv);
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &size);
+ #endif
+
+ pdc = PDCinit("pdc");
+ cont_prop = PDCprop_create(PDC_CONT_CREATE, pdc);
+ sprintf(cont_name, "c%d", rank);
+ cont = PDCcont_create(cont_name, cont_prop);
+ obj_prop = PDCprop_create(PDC_OBJ_CREATE, pdc);
+ PDCprop_set_obj_type(obj_prop, PDC_INT);
+ PDCprop_set_obj_dims(obj_prop, 2, dims);
+ PDCprop_set_obj_user_id(obj_prop, getuid());
+ PDCprop_set_obj_time_step(obj_prop, 0);
+ PDCprop_set_obj_app_name(obj_prop, "DataServerTest");
+ PDCprop_set_obj_tags(obj_prop, "tag0=1");
+
+ sprintf(obj_name1, "o1_%d", rank);
+ obj1 = PDCobj_create(cont, obj_name1, obj_prop);
+ sprintf(obj_name2, "o2_%d", rank);
+ obj2 = PDCobj_create(cont, obj_name2, obj_prop);
+
+ offset[0] = 0;
+ offset_length[0] = BUF_LEN;
+ reg = PDCregion_create(1, offset, offset_length);
+ offset[0] = 0;
+ offset[1] = 0;
+ offset_length[0] = BUF_LEN / 4;
+ offset_length[1] = 4;
+ reg_global = PDCregion_create(2, offset, offset_length);
+
+ for (i = 0; i < BUF_LEN; ++i)
+ data[i] = i;
+
+ transfer_request = PDCregion_transfer_create(data, PDC_WRITE, obj1, reg, reg_global);
+ PDCregion_transfer_start(transfer_request);
+ PDCregion_transfer_wait(transfer_request);
+ PDCregion_transfer_close(transfer_request);
+ PDCregion_close(reg);
+ PDCregion_close(reg_global);
+
+ offset[0] = 0;
+ offset_length[0] = BUF_LEN;
+ reg = PDCregion_create(1, offset, offset_length);
+ offset[0] = 0;
+ offset[1] = 0;
+ offset_length[0] = BUF_LEN / 4;
+ offset_length[1] = 4;
+ reg_global = PDCregion_create(2, offset, offset_length);
+
+ transfer_request = PDCregion_transfer_create(data_read, PDC_READ, obj1, reg, reg_global);
+ PDCregion_transfer_start(transfer_request);
+ PDCregion_transfer_wait(transfer_request);
+ PDCregion_transfer_close(transfer_request);
+
+ for (i = 0; i < BUF_LEN; ++i)
+ if (data_read[i] != i)
+ ret_value = 1;
+
+ PDCregion_close(reg);
+ PDCregion_close(reg_global);
+ PDCobj_close(obj1);
+ PDCobj_close(obj2);
+ PDCcont_close(cont);
+ PDCprop_close(obj_prop);
+ PDCprop_close(cont_prop);
+ free(data);
+ free(data_read);
+ PDCclose(pdc);
+
+ #ifdef ENABLE_MPI
+ MPI_Finalize();
+ #endif
+ return ret_value;
+ }
+
+.. _2D-batch-region-transfer:
+
+2D Batch Region Transfer Example
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: C
+
+ #include
+ #include
+ #include
+ #include "pdc.h"
+
+ int main() {
+ // Initialize PDC
+ pdcid_t pdc_id = PDCinit("pdc");
+ pdcid_t prop = PDCprop_create(PDC_OBJ_CREATE, pdc_id);
+
+ // Set object dimensions
+ uint64_t dims[2] = {40, 10};
+ PDCprop_set_obj_dims(prop, dims);
+ PDCprop_set_obj_type(prop, PDC_INT);
+
+ // Create object
+ pdcid_t obj_id = PDCobj_create(pdc_id, "2d_obj", prop);
+
+ // Create memory and object regions for 4 different 10x10 regions
+ uint64_t offsets[4][2] = {
+ {0, 0}, {10, 0}, {20, 0}, {30, 0}
+ };
+
+ int *buffers[4];
+ pdcid_t mem_regions[4], obj_regions[4], transfers[4];
+
+ for (int i = 0; i < 4; i++) {
+ buffers[i] = malloc(sizeof(int) * 10 * 10);
+ for (int j = 0; j < 100; j++) {
+ buffers[i][j] = i * 1000 + j;
+ }
+
+ mem_regions[i] = PDCregion_create(2, (uint64_t[]){0, 0}, (uint64_t[]){10, 10});
+ obj_regions[i] = PDCregion_create(2, offsets[i], (uint64_t[]){10, 10});
+ transfers[i] = PDCregion_transfer_create(buffers[i], PDC_WRITE, obj_id, obj_regions[i], mem_regions[i]);
+ }
+
+ // Start and wait all transfers
+ PDCregion_transfer_start_all(4, transfers);
+ PDCregion_transfer_wait_all(4, transfers);
+
+ // Cleanup
+ for (int i = 0; i < 4; i++) {
+ PDCregion_close(mem_regions[i]);
+ PDCregion_close(obj_regions[i]);
+ PDCregion_transfer_close(transfers[i]);
+ free(buffers[i]);
+ }
+
+ PDCobj_close(obj_id);
+ PDCprop_close(prop);
+ PDCclose(pdc_id);
+
+ return 0;
+ }
+
+.. _get-put-object:
+
+Get Put Object Example
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: C
+
+ #include
+ #include
+ #include
+ #include "pdc.h"
+
+ int main(int argc, char **argv)
+ {
+ pdcid_t pdc, cont_prop, cont;
+ pdcid_t obj1, obj2;
+ char cont_name[128], obj_name1[128], obj_name2[128];
+ char *data = (char *)malloc(sizeof(double) * 128);
+
+ #ifdef ENABLE_MPI
+ MPI_Init(&argc, &argv);
+ #endif
+
+ pdc = PDCinit("pdc");
+ cont_prop = PDCprop_create(PDC_CONT_CREATE, pdc);
+ sprintf(cont_name, "c%d", 0);
+ cont = PDCcont_create(cont_name, cont_prop);
+
+ memset(data, 1, 128 * sizeof(double));
+ sprintf(obj_name1, "o1_%d", 0);
+ obj1 = PDCobj_put_data(obj_name1, data, 16 * sizeof(double), cont);
+
+ memset(data, 2, 128 * sizeof(double));
+ sprintf(obj_name2, "o2_%d", 0);
+ obj2 = PDCobj_put_data(obj_name2, data, 128 * sizeof(double), cont);
+
+ memset(data, 0, 128 * sizeof(double));
+ PDCobj_get_data(obj1, data, 16 * sizeof(double));
+
+ memset(data, 0, 128 * sizeof(double));
+ PDCobj_get_data(obj2, data, 128 * sizeof(double));
+
+ PDCobj_close(obj1);
+ PDCobj_close(obj2);
+ PDCcont_close(cont);
+ PDCprop_close(cont_prop);
+ free(data);
+ PDCclose(pdc);
+
+ #ifdef ENABLE_MPI
+ MPI_Finalize();
+ #endif
+ return 0;
+ }
diff --git a/src/api/pdc_obj/include/pdc_obj.h b/src/api/pdc_obj/include/pdc_obj.h
index af334a46d..e6c9af499 100644
--- a/src/api/pdc_obj/include/pdc_obj.h
+++ b/src/api/pdc_obj/include/pdc_obj.h
@@ -31,19 +31,47 @@
/* Public Typedefs */
/*******************/
typedef enum { PDC_NA = 0, PDC_READ = 1, PDC_WRITE = 2 } pdc_access_t;
+
+/**
+ * @brief Partitioning strategy for PDC regions of an object.
+ *
+ * An abstract region of an object can be partitioned in four ways.
+ *
+ * The default is PDC_REGION_STATIC.
+ */
typedef enum {
- PDC_OBJ_STATIC = 0,
- PDC_REGION_STATIC = 1,
+ /// @brief Object static partitioning.
+ /// Input transfer requests are directly packed with a one-to-one mapping.
+ /// The target data server is determined at object create/open time.
+ PDC_OBJ_STATIC = 0,
+
+ /// @brief Region static partitioning.
+ /// Each region is equally partitioned across all data servers.
+ PDC_REGION_STATIC = 1,
+
+ /// @brief Region dynamic partitioning.
+ /// The metadata server selects the data server dynamically based on
+ /// current system load to balance region assignments.
PDC_REGION_DYNAMIC = 2,
- PDC_REGION_LOCAL = 3
+
+ /// @brief Node-local region placement.
+ /// The metadata server selects a data server on the same node (or closest)
+ /// to the client transferring the request.
+ PDC_REGION_LOCAL = 3
} pdc_region_partition_t;
+
typedef enum { PDC_BLOCK = 0, PDC_NOBLOCK = 1 } pdc_lock_mode_t;
+
+/**
+ * @brief Enum defining the consistency model for PDC objects:
+ */
typedef enum {
- PDC_CONSISTENCY_DEFAULT = 0,
- PDC_CONSISTENCY_POSIX = 1,
- PDC_CONSISTENCY_COMMIT = 2,
- PDC_CONSISTENCY_SESSION = 3,
- PDC_CONSISTENCY_EVENTUAL = 4
+ /// @brief Synchronous region writes and reads. Operations
+ /// are immediately visible to all processes (POSIX semantics)
+ PDC_CONSISTENCY_POSIX = 1,
+ /// @brief Asynchronous updates. Reads may return stale data
+ /// until updates propagate (eventual consistency).
+ PDC_CONSISTENCY_EVENTUAL = 2
} pdc_consistency_t;
typedef struct _pdc_id_info obj_handle;
@@ -268,7 +296,7 @@ perr_t PDCprop_set_obj_transfer_region_type(pdcid_t obj_prop, pdc_region_partiti
* returned by PDCprop_create(PDC_OBJ_CREATE)
* \param consistency [IN] Consistency semantics required
- * e.g., PDC_CONSISTENCY_DEFAULT, PDC_CONSISTENCY_POSIX, etc
+ * e.g., PDC_CONSISTENCY_EVENTUAL or PDC_CONSISTENCY_POSIX.
*
* \return Non-negative on success/Negative on failure
*/
diff --git a/src/api/pdc_obj/include/pdc_prop.h b/src/api/pdc_obj/include/pdc_prop.h
index fb13ff407..464507f1f 100644
--- a/src/api/pdc_obj/include/pdc_prop.h
+++ b/src/api/pdc_obj/include/pdc_prop.h
@@ -43,7 +43,18 @@ struct pdc_obj_prop {
/*******************/
/* Public Typedefs */
/*******************/
-typedef enum { PDC_CONT_CREATE = 0, PDC_OBJ_CREATE } pdc_prop_type_t;
+
+/**
+ * @brief Type of a PDC property
+ *
+ * Indicates what the property is used for when creating objects or containers.
+ */
+typedef enum {
+ /// @brief Property applies to container creation.
+ PDC_CONT_CREATE = 0,
+ /// @brief Property applies to object creation.
+ PDC_OBJ_CREATE
+} pdc_prop_type_t;
/*********************/
/* Public Prototypes */
@@ -98,25 +109,4 @@ struct _pdc_cont_prop *PDCcont_prop_get_info(pdcid_t prop_id);
*/
struct pdc_obj_prop *PDCobj_prop_get_info(pdcid_t prop_id);
-/**
- * Send updated metadata (stored as property) to metadata server
- *
- * \param obj_id[IN] Object ID
- * \param prop_id[IN] Object property
- *
- * \return Non-negative on success/Negative on failure
- */
-perr_t PDCprop_update(pdcid_t obj_id, pdcid_t prop_id);
-
-/**
- * **********
- *
- * \param obj_id[IN] Object ID
- * \param tag_name [IN] Metadta field name
- * \param tag_value [IN] Metadta field value
- *
- * \return Non-negative on success/Negative on failure
- */
-perr_t PDCtag_get(pdcid_t obj_id, char *tag_name, void *tag_value);
-
#endif /* PDC_PROP_H */
diff --git a/src/commons/include/pdc_public.h b/src/commons/include/pdc_public.h
index aa628f7fb..e3ca2e993 100644
--- a/src/commons/include/pdc_public.h
+++ b/src/commons/include/pdc_public.h
@@ -33,7 +33,18 @@
/*******************/
/* Public Typedefs */
/*******************/
-typedef int perr_t;
+
+/**
+ * @brief PDC error
+ *
+ * Negative indicates error success otherwise
+ */
+typedef int perr_t;
+/**
+ * @brief PDC ID
+ *
+ * 0 indicates error success otherwise
+ */
typedef uint64_t pdcid_t;
typedef unsigned long long psize_t;
typedef bool pbool_t;
@@ -42,6 +53,35 @@ typedef int PDC_int_t;
typedef float PDC_float_t;
typedef double PDC_double_t;
+/**
+ * @brief PDC variable types
+ *
+ * List of all variable types:
+ *
+ * - PDC_UNKNOWN = 0 : error
+ * - PDC_SHORT = 1 : short types
+ * - PDC_INT = 2 : integer types (identical to int32_t)
+ * - PDC_UINT = 3 : unsigned integer types (identical to uint32_t)
+ * - PDC_LONG = 4 : long types
+ * - PDC_INT8 = 5 : 8-bit integer types
+ * - PDC_UINT8 = 6 : 8-bit unsigned integer types
+ * - PDC_INT16 = 7 : 16-bit integer types
+ * - PDC_UINT16 = 8 : 16-bit unsigned integer types
+ * - PDC_INT32 = 9 : 32-bit integer types, already listed as PDC_INT
+ * - PDC_UINT32 = 10 : 32-bit unsigned integer types
+ * - PDC_INT64 = 11 : 64-bit integer types
+ * - PDC_UINT64 = 12 : 64-bit unsigned integer types
+ * - PDC_FLOAT = 13 : floating-point types
+ * - PDC_DOUBLE = 14 : double types
+ * - PDC_CHAR = 15 : character types
+ * - PDC_STRING = 16 : string types
+ * - PDC_BOOLEAN = 17 : boolean types
+ * - PDC_VOID_PTR = 18 : void pointer type
+ * - PDC_SIZE_T = 19 : size_t type
+ * - PDC_BULKI = 20 : BULKI type
+ * - PDC_BULKI_ENT = 21 : BULKI_ENTITY type
+ * - PDC_TYPE_COUNT = 22 : number of variable types (must be last)
+ */
typedef pdc_c_var_type_t pdc_var_type_t;
// FIXME: common data structure should be defined in a group of common header files.
@@ -52,7 +92,18 @@ typedef struct pdc_kvtag_t {
void * value;
} pdc_kvtag_t;
-typedef enum { PDC_PERSIST, PDC_TRANSIENT } pdc_lifetime_t;
+/**
+ * @brief Lifetime of a PDC container
+ *
+ * The default is PDC_PERSIST
+ */
+typedef enum {
+ /// @brief The container persists beyond the lifetime of the creating process.
+ PDC_PERSIST,
+ /// @brief The container exists only for the duration
+ /// of the creating process and is deleted when the process exits.
+ PDC_TRANSIENT
+} pdc_lifetime_t;
typedef enum { PDC_SERVER_DEFAULT = 0, PDC_SERVER_PER_CLIENT = 1 } pdc_server_selection_t;