diff --git a/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md b/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md
index a864635082a1..5f9cfd3c03d6 100644
--- a/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md
+++ b/sdk/cosmos/azure-cosmos/docs/ErrorCodesAndRetries.md
@@ -2,17 +2,17 @@
 
 The Cosmos DB Python SDK has several default policies that will deal with retrying certain errors and exceptions. More information on these can be found below.
 
-| Status code  | Cause of exception and retry behavior                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| :--- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 400    | For all operations: </br><ul><li> This exception is encountered when the request is invalid, which could be for any of the following reasons: </br><ul><li>Syntax error in query text</li><li>Malformed JSON document for a write request</li><li>Incorrectly formatted REST API request body etc.</li></ul></li><li>The client does NOT retry the request when a Bad Request (400) exception is thrown by the server.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| 401   | For all operations: </br><ul><li> This is an unauthorized exception due to invalid auth tokens being used for the request. The client does NOT retry requests when this exception is encountered.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
-| 403    | <ul><li>For Substatus 3 (Write Forbidden) and Substatus 1008 (Database Account Not Found): </br><ul><li>This exception occurs when a geo-replicated database account runs into writable/readable location changes (say, after a failover).</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li></ul></li><li>For all other cases: </br><ul><li> The client does NOT retry requests when this exception is encountered. </li>                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| 404/1002    | <ul><li>For write operations: </br><ul><li>If multiple write locations are enabled for the account, the SDK will fetch the write endpoints and retry once per each of these. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li><li>If the account does not have multiple write locations enabled, the SDK will retry only once in the account primary region. </li></ul></li><li>For read operations: </br><ul><li>If multiple write locations are enabled for the account, the SDK will fetch the read endpoints and retry once per each of these. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li><li>If the account does not have multiple write locations enabled, the SDK will retry only once in the account primary region. </li>                                                                                       |
-| 408    | <ul><li>For Write Operations: <br><ul><li>Timeout exceptions can be encountered by both the client as well as the server. Server-side timeout exceptions are not retried for write operations as it is not possible to determine if the write was in fact successfully committed on the server. For a client-generated timeout exception, either the request was sent over the wire to the server by the client and the network request timeout exceeded while waiting for a response, or the request was not sent over the wire to the server which resulted in a client-generated timeout. The client does NOT retry for either.</li></ul><li>For Query and Point Read Operations:</br><ul><li>The request is retried locally for up to 120 attempts with exponential backoff. </li><li>The SDK will retry on the next preferred region, if any is available.</li></ul> </li></ul>                                                                                                                                                                         |
-| 409    | <ul><li>For Write Operations: </br><ul><li>This exception occurs when an attempt is made by the application to Create/Insert an Item that already exists.</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>This exception can occur for write operations when an attempt is made to create an existing item or when a unique key constraint violation occurs. </li><li>The client does NOT retry on Conflict exceptions </li></ul></li><li>For Query and Point Read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert operations. </li></ul></li>                                                                                                                                                                                                                                                                                                                                                                                                                       |
-| 410/1002    | <ul><li>For all operations: </br><ul><li>This exception occurs when a partition is split (or merged in the future) and no longer exists, and can occur regardless of the Consistency level set for the account.</li><li>The SDK will refresh its partition key range cache and trigger a single retry, fetching the new ranges from the gateway once it finds an empty cache. </li>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| 412  | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when the etag that is sent to the server for validation prior to updating an Item, does not match the etag of the Item on the server. </li><li>The client does NOT retry this operation locally or against any of the remote regions for the account as retries would not help alleviate the etag mismatch. </li><li>The application would need to trigger a retry by first reading the Item, fetching the latest etag and issuing the Upsert/Replace operation. </br><ul><li>This operation can continue to fail with the same exception when multiple updates are executed concurrently for the same Item. </li><li>An upper bound on the number of retries before handing off the Item to a dead letter queue should be implemented by the application. </li></ul></li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul>                               |
-| 429  | For all Operations: </br><ul><li>By default, the client retries the request for a maximum of 9 times (or for a maximum of 30 seconds, whichever limit is reached first). </li><li>The client can also be initialized with a custom retry policy, which overrides the two limits mentioned above. </li><li>After all the retries are exhausted, the client bubbles up the exception to the application. </li><li>**For a multi-region account**, the client does NOT retry the request against a remote region for the account. </li><li>When the application receives a Request Rate too large exception (429), the application would need to instrument its own retry logic and dead letter queues. </li></ul>                                                                                                                                                                                                                                                                                                                                              |
-| 449  | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when a resource is concurrently updated on the server, which can happen due to concurrent writes, user triggered while conflicts are concurrently being resolved etc. </li><li>Only one update can be executed at a time per item. The other concurrent requests will fail with a Concurrent Execution Exception (449). </li><li>The client does NOT retry requests that failed with a 449. </li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul>                                                                                                                                                                                                                                                                                                                                                                                                        |
-| 500  | For all Operations: </br><ul><li>The occurrence of an Invalid Exception (500) is extremely rare, and the client does NOT retry a request that encounters this exception. </li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| 503  | When a Service Unavailable exception is encountered: </br><ul><li>For data plane Write Operations: </br><ul><li>The request will NOT be retried by the SDK, and will get bubbled up to the user as a ServiceRequestError. </li></ul><li>For Read/Query/metadata operations: </br><ul><li> The SDK will retry **once** on the next preferred region, if any is available, and after this retry is exhausted the exception is bubbled up to the application as a ServiceRequestError.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| Status code  | Cause of exception and retry behavior                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| :--- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| 400    | For all operations: </br><ul><li> This exception is encountered when the request is invalid, which could be for any of the following reasons: </br><ul><li>Syntax error in query text</li><li>Malformed JSON document for a write request</li><li>Incorrectly formatted REST API request body etc.</li></ul></li><li>The client does NOT retry the request when a Bad Request (400) exception is thrown by the server.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+| 401   | For all operations: </br><ul><li> This is an unauthorized exception due to invalid auth tokens being used for the request. The client does NOT retry requests when this exception is encountered.</li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| 403    | <ul><li>For Substatus 3 (Write Forbidden) and Substatus 1008 (Database Account Not Found): </br><ul><li>This exception occurs when a geo-replicated database account runs into writable/readable location changes (say, after a failover).</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li></ul></li><li>For all other cases: </br><ul><li> The client does NOT retry requests when this exception is encountered. </li>                                                                                                                                                                                                                                                                                                                                                                                             |
+| 404/1002    | <ul><li>For write operations: </br><ul><li>If multiple write locations are enabled for the account, the SDK will fetch the write endpoints and retry once per each of these. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li><li>If the account does not have multiple write locations enabled, the SDK will retry only once in the account primary region. </li></ul></li><li>For read operations: </br><ul><li>If multiple write locations are enabled for the account, the SDK will fetch the read endpoints and retry once per each of these. </li><li>The client refreshes it's location endpoints and retries requests when the user has enabled endpoint discovery in their client (default behavior).</li><li>If the account does not have multiple write locations enabled, the SDK will retry only once in the account primary region. </li>                                                         |
+| 408    | <ul><li>For Write Operations: <br><ul><li>Timeout exceptions can be encountered by both the client as well as the server. Server-side timeout exceptions are not retried for write operations as it is not possible to determine if the write was in fact successfully committed on the server. For a client-generated timeout exception, either the request was sent over the wire to the server by the client and the network request timeout exceeded while waiting for a response, or the request was not sent over the wire to the server which resulted in a client-generated timeout. The client does NOT retry for either.</li></ul><li>For Query and Point Read Operations:</br><ul><li>The SDK will retry on the next preferred region, if any is available.</li></ul> </li></ul>                                                                                                                                                                                                                                    |
+| 409    | <ul><li>For Write Operations: </br><ul><li>This exception occurs when an attempt is made by the application to Create/Insert an Item that already exists.</li><li>This exception can occur regardless of the Consistency level set for the account. </li><li>This exception can occur for write operations when an attempt is made to create an existing item or when a unique key constraint violation occurs. </li><li>The client does NOT retry on Conflict exceptions </li></ul></li><li>For Query and Point Read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert operations. </li></ul></li>                                                                                                                                                                                                                                                                                                                                                                                         |
+| 410/1002    | <ul><li>For all operations: </br><ul><li>This exception occurs when a partition is split (or merged in the future) and no longer exists, and can occur regardless of the Consistency level set for the account.</li><li>The SDK will refresh its partition key range cache and trigger a single retry, fetching the new ranges from the gateway once it finds an empty cache. </li>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| 412  | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when the etag that is sent to the server for validation prior to updating an Item, does not match the etag of the Item on the server. </li><li>The client does NOT retry this operation locally or against any of the remote regions for the account as retries would not help alleviate the etag mismatch. </li><li>The application would need to trigger a retry by first reading the Item, fetching the latest etag and issuing the Upsert/Replace operation. </br><ul><li>This operation can continue to fail with the same exception when multiple updates are executed concurrently for the same Item. </li><li>An upper bound on the number of retries before handing off the Item to a dead letter queue should be implemented by the application. </li></ul></li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul> |
+| 429  | For all Operations: </br><ul><li>By default, the client retries the request for a maximum of 9 times (or for a maximum of 30 seconds, whichever limit is reached first). </li><li>The client can also be initialized with a custom retry policy, which overrides the two limits mentioned above. </li><li>After all the retries are exhausted, the client bubbles up the exception to the application. </li><li>**For a multi-region account**, the client does NOT retry the request against a remote region for the account. </li><li>When the application receives a Request Rate too large exception (429), the application would need to instrument its own retry logic and dead letter queues. </li></ul>                                                                                                                                                                                                                                                                                                                |
+| 449  | <ul><li>For Write Operations: </br><ul><li>This exception is encountered when a resource is concurrently updated on the server, which can happen due to concurrent writes, user triggered while conflicts are concurrently being resolved etc. </li><li>Only one update can be executed at a time per item. The other concurrent requests will fail with a Concurrent Execution Exception (449). </li><li>The client does NOT retry requests that failed with a 449. </li></ul></li><li>For Query and point read Operations: </br><ul><li>N/A as this exception is only encountered for Create/Insert/Replace/Upsert operations. </li></ul></li></ul>                                                                                                                                                                                                                                                                                                                                                                          |
+| 500  | For all Operations: </br><ul><li>The occurrence of an Invalid Exception (500) is extremely rare, and the client will retry a request that encounters this exception on the next preferred regions. </li></ul>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| 503  | When a Service Unavailable exception is encountered:  </br><ul><li>The request will be retried by the SDK on the next preferred regions.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
diff --git a/sdk/cosmos/azure-cosmos/samples/session_token_management.py b/sdk/cosmos/azure-cosmos/samples/session_token_management.py
index 2dc212b58215..46e781e86435 100644
--- a/sdk/cosmos/azure-cosmos/samples/session_token_management.py
+++ b/sdk/cosmos/azure-cosmos/samples/session_token_management.py
@@ -6,7 +6,7 @@
 import json
 import random
 import uuid
-from typing import Dict, Any
+from typing import Dict, Any, List, Tuple
 
 from azure.cosmos import PartitionKey
 from azure.cosmos import CosmosClient
@@ -54,8 +54,7 @@ def storing_session_tokens_pk(container):
     # Everything below is just a simulation of what could be run on different machines and clients
     # to store session tokens in a cache by feed range from the partition key.
     # The cache is a Dict here for simplicity but in a real-world scenario, it would be some service.
-    feed_ranges_and_session_tokens = []
-    previous_session_token = ""
+    feed_ranges_and_session_tokens: List[Tuple[Dict[str, Any], str]] = []
 
     # populating cache with session tokens
     for i in range(5):
@@ -65,19 +64,24 @@ def storing_session_tokens_pk(container):
             'pk': 'A' + str(random.randint(1, 10))
         }
         target_feed_range = container.feed_range_from_partition_key(item['pk'])
-        response = container.create_item(item, session_token=previous_session_token)
-        session_token = response.get_response_headers()[HttpHeaders.SessionToken]
-        # adding everything in the cache in case consolidation is possible
-        for feed_range_json, session_token_cache in cache.items():
-            feed_range = json.loads(feed_range_json)
-            feed_ranges_and_session_tokens.append((feed_range, session_token_cache))
-        feed_ranges_and_session_tokens.append((target_feed_range, session_token))
-        latest_session_token = container.get_latest_session_token(feed_ranges_and_session_tokens, target_feed_range)
-        # only doing this for the key to be immutable
-        feed_range_json = json.dumps(target_feed_range)
-        cache[feed_range_json] = latest_session_token
-        previous_session_token = session_token
-
+        perform_create_item_with_cached_session_token(cache, container, feed_ranges_and_session_tokens, item,
+                                                      target_feed_range)
+
+def perform_create_item_with_cached_session_token(cache, container, feed_ranges_and_session_tokens, item,
+                                                  target_feed_range):
+    # only doing this for the key to be immutable
+    feed_range_json = json.dumps(target_feed_range)
+    session_token = cache[feed_range_json] if feed_range_json in cache else None
+    response = container.create_item(item, session_token=session_token)
+    response_session_token = response.get_response_headers()[HttpHeaders.SessionToken]
+    # adding everything from the cache in case consolidation is possible
+    for feed_range_json, session_token_cache in cache.items():
+        feed_range = json.loads(feed_range_json)
+        feed_ranges_and_session_tokens.append((feed_range, session_token_cache))
+    feed_ranges_and_session_tokens.append((target_feed_range, response_session_token))
+    latest_session_token = container.get_latest_session_token(feed_ranges_and_session_tokens, target_feed_range)
+    # only doing this for the key to be immutable
+    cache[feed_range_json] = latest_session_token
 
 def storing_session_tokens_container_feed_ranges(container):
     print('2. Storing session tokens in a cache by feed range from the container.')
@@ -87,8 +91,7 @@ def storing_session_tokens_container_feed_ranges(container):
 
     # Everything below is just a simulation of what could be run on different machines and clients
     # to store session tokens in a cache by feed range from the partition key.
-    feed_ranges_and_session_tokens = []
-    previous_session_token = ""
+    feed_ranges_and_session_tokens: List[Tuple[Dict[str, Any], str]] = []
     feed_ranges = list(container.read_feed_ranges())
 
     # populating cache with session tokens
@@ -99,24 +102,11 @@ def storing_session_tokens_container_feed_ranges(container):
             'pk': 'A' + str(random.randint(1, 10))
         }
         feed_range_from_pk = container.feed_range_from_partition_key(item['pk'])
-        response = container.create_item(item, session_token=previous_session_token)
-        session_token = response.get_response_headers()[HttpHeaders.SessionToken]
-        # adding everything in the cache in case consolidation is possible
-
-        for feed_range_json, session_token_cache in cache.items():
-            feed_range = json.loads(feed_range_json)
-            feed_ranges_and_session_tokens.append((feed_range, session_token_cache))
         target_feed_range: dict = next(
             (feed_range for feed_range in feed_ranges if container.is_feed_range_subset(feed_range, feed_range_from_pk)),
             {}
         )
-        feed_ranges_and_session_tokens.append((target_feed_range, session_token))
-        latest_session_token = container.get_latest_session_token(feed_ranges_and_session_tokens, target_feed_range)
-        # only doing this for the key to be immutable
-        feed_range_json = json.dumps(target_feed_range)
-        cache[feed_range_json] = latest_session_token
-        previous_session_token = session_token
-
+        perform_create_item_with_cached_session_token(cache, container, feed_ranges_and_session_tokens, item, target_feed_range)
 
 def run_sample():
     with CosmosClient(HOST, CREDENTIAL) as client:
diff --git a/sdk/cosmos/azure-cosmos/samples/session_token_management_async.py b/sdk/cosmos/azure-cosmos/samples/session_token_management_async.py
index e940e218c5cf..9ba58d4c0dc4 100644
--- a/sdk/cosmos/azure-cosmos/samples/session_token_management_async.py
+++ b/sdk/cosmos/azure-cosmos/samples/session_token_management_async.py
@@ -6,7 +6,7 @@
 import json
 import random
 import uuid
-from typing import Dict, Any
+from typing import Dict, Any, List, Tuple
 
 from azure.cosmos import PartitionKey
 from azure.cosmos.aio import CosmosClient
@@ -55,8 +55,7 @@ async def storing_session_tokens_pk(container):
     # Everything below is just a simulation of what could be run on different machines and clients
     # to store session tokens in a cache by feed range from the partition key.
     # The cache is a Dict here for simplicity but in a real-world scenario, it would be some service.
-    feed_ranges_and_session_tokens = []
-    previous_session_token = ""
+    feed_ranges_and_session_tokens: List[Tuple[Dict[str, Any], str]] = []
 
     # populating cache with session tokens
     for i in range(5):
@@ -66,18 +65,23 @@ async def storing_session_tokens_pk(container):
             'pk': 'A' + str(random.randint(1, 10))
         }
         target_feed_range = await container.feed_range_from_partition_key(item['pk'])
-        response = await container.create_item(item, session_token=previous_session_token)
-        session_token = response.get_response_headers()[HttpHeaders.SessionToken]
-        # adding everything in the cache in case consolidation is possible
-        for feed_range_json, session_token_cache in cache.items():
-            feed_range = json.loads(feed_range_json)
-            feed_ranges_and_session_tokens.append((feed_range, session_token_cache))
-        feed_ranges_and_session_tokens.append((target_feed_range, session_token))
-        latest_session_token = await container.get_latest_session_token(feed_ranges_and_session_tokens, target_feed_range)
-        # only doing this for the key to be immutable
-        feed_range_json = json.dumps(target_feed_range)
-        cache[feed_range_json] = latest_session_token
-        previous_session_token = session_token
+        await perform_create_item_with_cached_session_token(cache, container, feed_ranges_and_session_tokens, item,
+                                                            target_feed_range)
+
+async def perform_create_item_with_cached_session_token(cache, container, feed_ranges_and_session_tokens, item,
+                                                        target_feed_range):
+    # only doing this for the key to be immutable
+    feed_range_json = json.dumps(target_feed_range)
+    session_token = cache[feed_range_json] if feed_range_json in cache else None
+    response = await container.create_item(item, session_token=session_token)
+    response_session_token = response.get_response_headers()[HttpHeaders.SessionToken]
+    # adding everything from the cache in case consolidation is possible
+    for feed_range_json, session_token_cache in cache.items():
+        feed_range = json.loads(feed_range_json)
+        feed_ranges_and_session_tokens.append((feed_range, session_token_cache))
+    feed_ranges_and_session_tokens.append((target_feed_range, response_session_token))
+    latest_session_token = await container.get_latest_session_token(feed_ranges_and_session_tokens, target_feed_range)
+    cache[feed_range_json] = latest_session_token
 
 
 async def storing_session_tokens_container_feed_ranges(container):
@@ -88,8 +92,7 @@ async def storing_session_tokens_container_feed_ranges(container):
 
     # Everything below is just a simulation of what could be run on different machines and clients
     # to store session tokens in a cache by feed range from the partition key.
-    feed_ranges_and_session_tokens = []
-    previous_session_token = ""
+    feed_ranges_and_session_tokens: List[Tuple[Dict[str, Any], str]] = []
     feed_ranges = [feed_range async for feed_range in container.read_feed_ranges()]
 
     # populating cache with session tokens
@@ -100,24 +103,12 @@ async def storing_session_tokens_container_feed_ranges(container):
             'pk': 'A' + str(random.randint(1, 10))
         }
         feed_range_from_pk = await container.feed_range_from_partition_key(item['pk'])
-        response = await container.create_item(item, session_token=previous_session_token)
-        session_token = response.get_response_headers()[HttpHeaders.SessionToken]
-        # adding everything in the cache in case consolidation is possible
-
-        for feed_range_json, session_token_cache in cache.items():
-            feed_range = json.loads(feed_range_json)
-            feed_ranges_and_session_tokens.append((feed_range, session_token_cache))
         target_feed_range = {}
         for feed_range in feed_ranges:
             if await container.is_feed_range_subset(feed_range, feed_range_from_pk):
                 target_feed_range = feed_range
                 break
-        feed_ranges_and_session_tokens.append((target_feed_range, session_token))
-        latest_session_token = await container.get_latest_session_token(feed_ranges_and_session_tokens, target_feed_range)
-        # only doing this for the key to be immutable
-        feed_range_json = json.dumps(target_feed_range)
-        cache[feed_range_json] = latest_session_token
-        previous_session_token = session_token
+        await perform_create_item_with_cached_session_token(cache, container, feed_ranges_and_session_tokens, item, target_feed_range)
 
 
 async def run_sample():