Skip to content

Commit b951161

Browse files
committed
server : handle context overflow during decode
1 parent d396b43 commit b951161

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

tools/server/server.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4107,6 +4107,9 @@ struct server_context {
41074107
if (slot.is_processing()) {
41084108
send_error(slot, err);
41094109
slot.release();
4110+
4111+
llama_memory_seq_rm(llama_get_memory(ctx), slot.id, -1, -1);
4112+
slot.prompt.tokens.clear();
41104113
}
41114114
}
41124115

0 commit comments

Comments
 (0)