File tree Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Original file line number Diff line number Diff line change @@ -75,7 +75,11 @@ def __new__(cls):
7575 documentation = "The percentage of decode slots currently being used" ,
7676 labelnames = ["id" , "idx" ],
7777 )
78-
78+ _model_load_time = Gauge (
79+ name = "jetstream_model_load_time" ,
80+ documentation = "Total time taken to load the model" ,
81+ labelnames = ["id" ],
82+ )
7983 _server_startup_latency = Gauge (
8084 name = "jetstream_server_startup_latency" ,
8185 documentation = "Total time taken to start the Jetstream server" ,
@@ -232,6 +236,9 @@ def get_slots_used_percentage_metric(self, idx: int):
232236 def get_server_startup_latency_metric (self ):
233237 return self ._server_startup_latency .labels (id = self ._id )
234238
239+ def get_model_load_time_metric (self ):
240+ return self ._model_load_time .labels (id = self ._id )
241+
235242 def get_time_to_first_token (self ):
236243 return self ._time_to_first_token .labels (id = self ._id )
237244
Original file line number Diff line number Diff line change @@ -114,10 +114,15 @@ def create_driver(
114114 An orchestrator driver.
115115 """
116116 engines = config_lib .get_engines (config , devices = devices )
117+ model_load_start_time = time .time ()
117118 prefill_params = [pe .load_params () for pe in engines .prefill_engines ]
118119 generate_params = [ge .load_params () for ge in engines .generate_engines ]
119120 shared_params = [ie .load_params () for ie in engines .interleaved_engines ]
120121 logging .info ("Loaded all weights." )
122+ if metrics_collector :
123+ metrics_collector .get_model_load_time_metric ().set (
124+ time .time () - model_load_start_time
125+ )
121126 interleaved_mode = (
122127 len (config .prefill_slices ) + len (config .generate_slices ) == 0
123128 )
You can’t perform that action at this time.
0 commit comments