update

Jackiexiao · Jackiexiao · commit a7f3b0eff26b · 2024-02-04T13:29:04.000+08:00
diff --git a/README.md b/README.md
@@ -168,6 +168,7 @@ Details regarding when to use these options and what to expect from them can be
 A value of 0 means ORT will pick a default which is number of cores.
 * `execution_mode`: Controls whether operators in the graph are executed sequentially or in parallel. Usually when the model has many branches, setting this option to 1 .i.e. "parallel" will give you better performance. Default is 0 which is "sequential execution."
 * `level`: Refers to the graph optimization level. By default all optimizations are enabled. Allowed values are -1 and 1. -1 refers to BASIC optimizations and 1 refers to basic plus extended optimizations like fusions. Please find the details [here](https://onnxruntime.ai/docs/performance/graph-optimizations.html)
+* `share_session_between_instances`: Boolean flag to enable share session between instances. If not specified, share_session_between_instances is disabled. This is a global parameter and cannot be defined per instance group. The user should determine if the parameter makes sense for their setup.
 
 ```
 optimization {
@@ -178,6 +179,7 @@ optimization {
 parameters { key: "intra_op_thread_count" value: { string_value: "0" } }
 parameters { key: "execution_mode" value: { string_value: "0" } }
 parameters { key: "inter_op_thread_count" value: { string_value: "0" } }
+parameters { key: "share_session_between_instances" value: {string_value: "true"} }
 
 ```
 * `enable_mem_arena`: Use 1 to enable the arena and 0 to disable. See [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#a0bbd62df2b3c119636fba89192240593) for more information.
diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc
@@ -117,10 +117,10 @@ class ModelState : public BackendModel {
 
   // Indicate if an onnxrt session should be shared or not. This is a model
   // global and applies to all instances. So, storing it in the model state
-  bool share_session_;
+  bool share_session_between_instances_;
 
   // maintain a map of group id to onnx_rt session. This is only useful if
-  // share_session is set to true in parameters. share_session is a global model
+  // share_session_between_instances is set to true in parameters. share_session_between_instances is a global model
   // config and the user should be careful when setting this. There is no way to
   // set this per instance group.
   std::unordered_map<std::string, std::shared_ptr<OrtSession>>
@@ -203,7 +203,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
 }
 
 ModelState::ModelState(TRITONBACKEND_Model* triton_model)
-    : BackendModel(triton_model), share_session_(false)
+    : BackendModel(triton_model), share_session_between_instances_(false)
 {
   // Create session options that will be cloned and used for each
   // instance when creating that instance's session.
@@ -358,13 +358,13 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
   // If this value is set all instances within an instance group will share
   // the ort session
   {
-    bool share_session;
+    bool share_session_between_instances;
     triton::common::TritonJson::Value params;
     if (ModelConfig().Find("parameters", &params)) {
       THROW_IF_BACKEND_MODEL_ERROR(TryParseModelStringParameter(
-          params, "share_session", &share_session, false));
+          params, "share_session_between_instances", &share_session_between_instances, false));
     }
-    share_session_ = share_session;
+    share_session_between_instances_ = share_session_between_instances;
   }
 }
 
@@ -405,7 +405,7 @@ ModelState::LoadModel(
 
   // Check is we are sharing the session. If so get the session pointer and
   // return
-  if (share_session_) {
+  if (share_session_between_instances_) {
     if (GetSessionForGroup(instance_group_name, session) == nullptr) {
       LOG_MESSAGE(
           TRITONSERVER_LOG_INFO,
@@ -689,7 +689,7 @@ ModelState::LoadModel(
 
     session = std::shared_ptr<OrtSession>(session_ptr, SessionDeleter());
 
-    if (share_session_) {
+    if (share_session_between_instances_) {
       // The session was created fine this is not a critical error
       LOG_IF_ERROR(
           SetSessionForGroup(instance_group_name, session),
@@ -938,14 +938,14 @@ ModelState::GetSessionForGroup(
 {
   RETURN_ERROR_IF_TRUE(
       group_name.empty(), TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("Invalid group name"));
+      std::string("Invalid group name: ") + group_name);
   {
     std::unordered_map<std::string, std::shared_ptr<OrtSession>>::iterator
         sessionEntry;
     sessionEntry = groupInstanceSessionMap_.find(group_name);
     RETURN_ERROR_IF_TRUE(
         (sessionEntry == groupInstanceSessionMap_.end()),
-        TRITONSERVER_ERROR_NOT_FOUND, std::string("No such group"));
+        TRITONSERVER_ERROR_NOT_FOUND, std::string("No such group") + group_name);
 
     session = sessionEntry->second;
   }
@@ -958,7 +958,7 @@ ModelState::SetSessionForGroup(
 {
   RETURN_ERROR_IF_TRUE(
       group_name.empty(), TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("Invalid group name"));
+      std::string("Invalid group name") + group_name);
 
   groupInstanceSessionMap_[group_name] = session;
   return nullptr;
@@ -1050,7 +1050,7 @@ class ModelInstanceState : public BackendModelInstance {
 
   // Onnx Runtime variables that are used across runs on this
   // instance.
-  std::shared_ptr<OrtSession> session_;
+  std::unique_ptr<OrtSession> session_;
   OrtAllocator* default_allocator_;
   OrtMemoryInfo* cuda_allocator_info_;
   const OrtMemoryInfo* cpu_allocator_info_;
diff --git a/src/onnxruntime_utils.cc b/src/onnxruntime_utils.cc
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "onnxruntime_utils.h"
+#include <regex>
 
 namespace triton { namespace backend { namespace onnxruntime {
 
@@ -497,18 +498,18 @@ std::string
 GetInstanceGroupName(
     const std::string& model_name, const std::string& instance_name)
 {
-  std::regex groupNameRegex('(' + model_name + '_' + "[0-9]" + ')');
-  std::smatch groupName;
+  std::regex group_name_regex('(' + model_name + '_' + "[0-9]" + ')');
+  std::smatch group_name;
 
   if (model_name.empty() || instance_name.empty()) {
     return "";
   }
 
-  if (std::regex_search(instance_name, groupName, groupNameRegex)) {
-    return groupName.str(1);
+  if (std::regex_search(instance_name, group_name, group_name_regex)) {
+    return group_name.str(1);
   }
 
   return "";
 }
 
-}}}  // namespace triton::backend::onnxruntime
+}}}  // namespace triton::backend::onnxruntime
diff --git a/src/onnxruntime_utils.h b/src/onnxruntime_utils.h
@@ -27,7 +27,6 @@
 #pragma once
 
 #include <onnxruntime_c_api.h>
-#include <regex>
 #include <set>
 #include <string>
 #include <unordered_map>

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@`
`25`	`25`	`// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
`26`	`26`
`27`	`27`	`#include "onnxruntime_utils.h"`
	`28`	`+#include <regex>`
`28`	`29`
`29`	`30`	`namespace triton { namespace backend { namespace onnxruntime {`
`30`	`31`
`@@ -497,18 +498,18 @@ std::string`
`497`	`498`	`GetInstanceGroupName(`
`498`	`499`	`const std::string& model_name, const std::string& instance_name)`
`499`	`500`	`{`
`500`		`- std::regex groupNameRegex('(' + model_name + '_' + "[0-9]" + ')');`
`501`		`- std::smatch groupName;`
	`501`	`+ std::regex group_name_regex('(' + model_name + '_' + "[0-9]" + ')');`
	`502`	`+ std::smatch group_name;`
`502`	`503`
`503`	`504`	`if (model_name.empty() \|\| instance_name.empty()) {`
`504`	`505`	`return "";`
`505`	`506`	`}`
`506`	`507`
`507`		`- if (std::regex_search(instance_name, groupName, groupNameRegex)) {`
`508`		`- return groupName.str(1);`
	`508`	`+ if (std::regex_search(instance_name, group_name, group_name_regex)) {`
	`509`	`+ return group_name.str(1);`
`509`	`510`	`}`
`510`	`511`
`511`	`512`	`return "";`
`512`	`513`	`}`
`513`	`514`
`514`		`-}}} // namespace triton::backend::onnxruntime`
	`515`	`+}}} // namespace triton::backend::onnxruntime`