Skip to content

Commit dfd1a39

Browse files
committed
better ram/meta estimation
1 parent a2035b5 commit dfd1a39

File tree

2 files changed

+32
-15
lines changed

2 files changed

+32
-15
lines changed

utensor_cgen/backend/utensor/code_generator/rearch/_code_generator.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
import re
2+
from collections import defaultdict
23
from itertools import chain
34
from pathlib import Path
4-
from collections import defaultdict
55

66
from utensor_cgen.backend.base import BackendPart
7+
from utensor_cgen.backend.graph_lower.generic_graph_lower import \
8+
TopoOrderTensorTimeslotPlanner
79
from utensor_cgen.backend.utensor.snippets.composer import Composer
810
from utensor_cgen.backend.utensor.snippets.legacy import (
911
ContextGlobalArrayContainer, WeightSnippet)
1012
from utensor_cgen.backend.utensor.snippets.rearch import (
11-
DeclareRamTensorSnippet, DeclareRomTensorSnippet,
12-
FreeTensorSnippet, SimpleContainer, TimeSlotContainer
13-
)
13+
DeclareRamTensorSnippet, DeclareRomTensorSnippet, FreeTensorSnippet,
14+
SimpleContainer, TimeSlotContainer)
1415
from utensor_cgen.backend.utensor.snippets.template_env import env
15-
from utensor_cgen.backend.graph_lower.generic_graph_lower import TopoOrderTensorTimeslotPlanner
1616
from utensor_cgen.logger import logger
1717
from utensor_cgen.utils import Configuration, class_property
1818

@@ -183,8 +183,10 @@ def _time_slot_generate_files(
183183
):
184184
template_vars = {}
185185
template_vars['model_name'] = ugraph.name
186-
template_vars['meta_data_pool_size'] = self._compute_meta_data_size(ugraph)
187-
template_vars['ram_data_pool_size'] = self._compute_ram_data_size(ugraph)
186+
(template_vars['meta_data_pool_size'],
187+
template_vars['meta_dtype']) = self._compute_meta_data_size(ugraph)
188+
(template_vars['ram_data_pool_size'],
189+
template_vars['ram_dtype']) = self._compute_ram_data_size(ugraph)
188190
template_vars['placeholders'] = placeholders
189191
template_vars['out_tensor_var_names'] = [
190192
tensor_var_map[tensor.name] for tensor in chain(*[
@@ -349,16 +351,23 @@ def default_config(cls):
349351
return config
350352

351353
def _compute_meta_data_size(self, ugraph):
352-
# TODO: if mem_optimizer is None, use a default mem optimizer
353354
if self.meta_data_pool_size == 'auto':
354-
# TODO: compute actual meta data size with ugraph
355-
size = 2048
355+
# NOTE: simple heuristic, num of tensors * 64, maybe more or less depending on target platform
356+
# NOTE: assuming user is using localCircularArenaAllocator
357+
# TODO: target aware estimation
358+
tensors = set()
359+
for op_info in ugraph.ops_info.values():
360+
tensors.update(op_info.input_tensors)
361+
tensors.update(op_info.output_tensors)
362+
size = len(tensors) * 64
356363
else:
357364
size = self.meta_data_pool_size
358-
return size
365+
dtype_str = self._get_mem_pool_dtype_str(size)
366+
return size, dtype_str
359367

360368
def _compute_ram_data_size(self, ugraph):
361-
# TODO: if mem_optimizer is None, use a default mem optimizer
369+
# TODO: if tensor alloc plan is None, use a default mem estimator
370+
# NOTE: assuming user is using localCircularArenaAllocator
362371
if self.ram_data_pool_size == 'auto':
363372
# TODO: compute actual ram data size with ugraph
364373
if '_tensor_alloc' in ugraph.attributes:
@@ -367,4 +376,12 @@ def _compute_ram_data_size(self, ugraph):
367376
size = 256
368377
else:
369378
size = self.ram_data_pool_size
370-
return size
379+
dtype_str = self._get_mem_pool_dtype_str(size)
380+
return size, dtype_str
381+
382+
@staticmethod
383+
def _get_mem_pool_dtype_str(size):
384+
# NOTE: assuming user is using localCircularArenaAllocator
385+
if size > 2**15:
386+
return 'uint32_t'
387+
return 'uint16_t'

utensor_cgen/backend/utensor/snippets/templates/snippets/rearch/simple.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
#include "uTensor/core/tensor.hpp"
55

6-
// estimated ram usage: {{ram_data_pool_size}} bytes
7-
// estimated meta data uage: {{meta_data_pool_size}} bytes
6+
constexpr {{ram_dtype}} estimated_ram_usage = {{ram_data_pool_size}};
7+
constexpr {{meta_dtype}} estimated_meta_usage = {{meta_data_pool_size}};
88

99
void compute_{{model_name}}({%for pl in placeholders%}uTensor::Tensor& {{pl}}, {%endfor%}{%for out_tensor in out_tensor_var_names%}uTensor::Tensor& {{out_tensor}}{%if not loop.last%}, {%endif%}{%endfor%});
1010

0 commit comments

Comments
 (0)