Skip to content

Commit 2e7769d

Browse files
authored
[None][feat] Add customized topk and related unit tests for DSA (#8882)
Signed-off-by: Christina Zhang <83400082+ChristinaZ@users.noreply.github.com>
1 parent f848d84 commit 2e7769d

File tree

8 files changed

+1800
-105
lines changed

8 files changed

+1800
-105
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
3+
* Copyright (c) 2021, NAVER Corp. Authored by CLOVA.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#pragma once
19+
20+
#include <cuda_bf16.h>
21+
#include <cuda_fp16.h>
22+
23+
#include "tensorrt_llm/common/cudaUtils.h"
24+
25+
namespace tensorrt_llm::kernels
26+
{
27+
void invokeIndexerTopKDecode(float const* logits, int const* seqLens, int* outIndices, float* auxLogits,
28+
int* auxIndices, int const splitWorkThreshold, int const numRows, int const numColumns, int const stride0,
29+
int const stride1, int const next_n, int const index_topk = 2048, cudaStream_t const stream = 0);
30+
31+
void invokeIndexerTopKPrefill(float const* logits, int const* rowStarts, int const* rowEnds, int* outIndices,
32+
int const numRows, int const numColumns, int const stride0, int const stride1, int const index_topk = 2048,
33+
cudaStream_t const stream = 0);
34+
35+
} // namespace tensorrt_llm::kernels

0 commit comments

Comments
 (0)