Skip to content

Commit a7f3b0e

Browse files
committed
update
1 parent e000a48 commit a7f3b0e

File tree

4 files changed

+20
-18
lines changed

4 files changed

+20
-18
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ Details regarding when to use these options and what to expect from them can be
168168
A value of 0 means ORT will pick a default which is number of cores.
169169
* `execution_mode`: Controls whether operators in the graph are executed sequentially or in parallel. Usually when the model has many branches, setting this option to 1 .i.e. "parallel" will give you better performance. Default is 0 which is "sequential execution."
170170
* `level`: Refers to the graph optimization level. By default all optimizations are enabled. Allowed values are -1 and 1. -1 refers to BASIC optimizations and 1 refers to basic plus extended optimizations like fusions. Please find the details [here](https://onnxruntime.ai/docs/performance/graph-optimizations.html)
171+
* `share_session_between_instances`: Boolean flag to enable share session between instances. If not specified, share_session_between_instances is disabled. This is a global parameter and cannot be defined per instance group. The user should determine if the parameter makes sense for their setup.
171172

172173
```
173174
optimization {
@@ -178,6 +179,7 @@ optimization {
178179
parameters { key: "intra_op_thread_count" value: { string_value: "0" } }
179180
parameters { key: "execution_mode" value: { string_value: "0" } }
180181
parameters { key: "inter_op_thread_count" value: { string_value: "0" } }
182+
parameters { key: "share_session_between_instances" value: {string_value: "true"} }
181183
182184
```
183185
* `enable_mem_arena`: Use 1 to enable the arena and 0 to disable. See [this](https://onnxruntime.ai/docs/api/c/struct_ort_api.html#a0bbd62df2b3c119636fba89192240593) for more information.

src/onnxruntime.cc

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@ class ModelState : public BackendModel {
117117

118118
// Indicate if an onnxrt session should be shared or not. This is a model
119119
// global and applies to all instances. So, storing it in the model state
120-
bool share_session_;
120+
bool share_session_between_instances_;
121121

122122
// maintain a map of group id to onnx_rt session. This is only useful if
123-
// share_session is set to true in parameters. share_session is a global model
123+
// share_session_between_instances is set to true in parameters. share_session_between_instances is a global model
124124
// config and the user should be careful when setting this. There is no way to
125125
// set this per instance group.
126126
std::unordered_map<std::string, std::shared_ptr<OrtSession>>
@@ -203,7 +203,7 @@ ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
203203
}
204204

205205
ModelState::ModelState(TRITONBACKEND_Model* triton_model)
206-
: BackendModel(triton_model), share_session_(false)
206+
: BackendModel(triton_model), share_session_between_instances_(false)
207207
{
208208
// Create session options that will be cloned and used for each
209209
// instance when creating that instance's session.
@@ -358,13 +358,13 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
358358
// If this value is set all instances within an instance group will share
359359
// the ort session
360360
{
361-
bool share_session;
361+
bool share_session_between_instances;
362362
triton::common::TritonJson::Value params;
363363
if (ModelConfig().Find("parameters", &params)) {
364364
THROW_IF_BACKEND_MODEL_ERROR(TryParseModelStringParameter(
365-
params, "share_session", &share_session, false));
365+
params, "share_session_between_instances", &share_session_between_instances, false));
366366
}
367-
share_session_ = share_session;
367+
share_session_between_instances_ = share_session_between_instances;
368368
}
369369
}
370370

@@ -405,7 +405,7 @@ ModelState::LoadModel(
405405

406406
// Check is we are sharing the session. If so get the session pointer and
407407
// return
408-
if (share_session_) {
408+
if (share_session_between_instances_) {
409409
if (GetSessionForGroup(instance_group_name, session) == nullptr) {
410410
LOG_MESSAGE(
411411
TRITONSERVER_LOG_INFO,
@@ -689,7 +689,7 @@ ModelState::LoadModel(
689689

690690
session = std::shared_ptr<OrtSession>(session_ptr, SessionDeleter());
691691

692-
if (share_session_) {
692+
if (share_session_between_instances_) {
693693
// The session was created fine this is not a critical error
694694
LOG_IF_ERROR(
695695
SetSessionForGroup(instance_group_name, session),
@@ -938,14 +938,14 @@ ModelState::GetSessionForGroup(
938938
{
939939
RETURN_ERROR_IF_TRUE(
940940
group_name.empty(), TRITONSERVER_ERROR_INVALID_ARG,
941-
std::string("Invalid group name"));
941+
std::string("Invalid group name: ") + group_name);
942942
{
943943
std::unordered_map<std::string, std::shared_ptr<OrtSession>>::iterator
944944
sessionEntry;
945945
sessionEntry = groupInstanceSessionMap_.find(group_name);
946946
RETURN_ERROR_IF_TRUE(
947947
(sessionEntry == groupInstanceSessionMap_.end()),
948-
TRITONSERVER_ERROR_NOT_FOUND, std::string("No such group"));
948+
TRITONSERVER_ERROR_NOT_FOUND, std::string("No such group") + group_name);
949949

950950
session = sessionEntry->second;
951951
}
@@ -958,7 +958,7 @@ ModelState::SetSessionForGroup(
958958
{
959959
RETURN_ERROR_IF_TRUE(
960960
group_name.empty(), TRITONSERVER_ERROR_INVALID_ARG,
961-
std::string("Invalid group name"));
961+
std::string("Invalid group name") + group_name);
962962

963963
groupInstanceSessionMap_[group_name] = session;
964964
return nullptr;
@@ -1050,7 +1050,7 @@ class ModelInstanceState : public BackendModelInstance {
10501050

10511051
// Onnx Runtime variables that are used across runs on this
10521052
// instance.
1053-
std::shared_ptr<OrtSession> session_;
1053+
std::unique_ptr<OrtSession> session_;
10541054
OrtAllocator* default_allocator_;
10551055
OrtMemoryInfo* cuda_allocator_info_;
10561056
const OrtMemoryInfo* cpu_allocator_info_;

src/onnxruntime_utils.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

2727
#include "onnxruntime_utils.h"
28+
#include <regex>
2829

2930
namespace triton { namespace backend { namespace onnxruntime {
3031

@@ -497,18 +498,18 @@ std::string
497498
GetInstanceGroupName(
498499
const std::string& model_name, const std::string& instance_name)
499500
{
500-
std::regex groupNameRegex('(' + model_name + '_' + "[0-9]" + ')');
501-
std::smatch groupName;
501+
std::regex group_name_regex('(' + model_name + '_' + "[0-9]" + ')');
502+
std::smatch group_name;
502503

503504
if (model_name.empty() || instance_name.empty()) {
504505
return "";
505506
}
506507

507-
if (std::regex_search(instance_name, groupName, groupNameRegex)) {
508-
return groupName.str(1);
508+
if (std::regex_search(instance_name, group_name, group_name_regex)) {
509+
return group_name.str(1);
509510
}
510511

511512
return "";
512513
}
513514

514-
}}} // namespace triton::backend::onnxruntime
515+
}}} // namespace triton::backend::onnxruntime

src/onnxruntime_utils.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#pragma once
2828

2929
#include <onnxruntime_c_api.h>
30-
#include <regex>
3130
#include <set>
3231
#include <string>
3332
#include <unordered_map>

0 commit comments

Comments
 (0)