Skip to content

chore(deletions): Remove old group deletion task #96457

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 6 additions & 17 deletions src/sentry/api/helpers/group_index/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import audit_log, eventstream, options
from sentry import audit_log
from sentry.api.base import audit_logger
from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE
from sentry.deletions.tasks.groups import delete_groups as delete_groups_task
from sentry.deletions.tasks.groups import delete_groups_for_project
from sentry.issues.grouptype import GroupCategory
from sentry.models.group import Group, GroupStatus
Expand Down Expand Up @@ -100,23 +99,13 @@ def delete_group_list(
# `Group` instances that are pending deletion
GroupInbox.objects.filter(project_id=project.id, group__id__in=group_ids).delete()

if options.get("deletions.groups.use-new-task"):
# Schedule a task per GROUP_CHUNK_SIZE batch of groups
for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
delete_groups_for_project.apply_async(
kwargs={
"project_id": project.id,
"object_ids": group_ids[i : i + GROUP_CHUNK_SIZE],
"transaction_id": str(transaction_id),
}
)
else:
eventstream_state = eventstream.backend.start_delete_groups(project.id, group_ids)
delete_groups_task.apply_async(
# Schedule a task per GROUP_CHUNK_SIZE batch of groups
for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
delete_groups_for_project.apply_async(
kwargs={
"object_ids": group_ids,
"project_id": project.id,
"object_ids": group_ids[i : i + GROUP_CHUNK_SIZE],
"transaction_id": str(transaction_id),
"eventstream_state": eventstream_state,
}
)

Expand Down
79 changes: 2 additions & 77 deletions src/sentry/deletions/tasks/groups.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from collections.abc import Mapping, Sequence
from collections.abc import Sequence
from typing import Any

import sentry_sdk

from sentry import deletions, eventstream
from sentry import deletions
from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE
from sentry.deletions.tasks.scheduled import MAX_RETRIES, logger
from sentry.exceptions import DeleteAborted
Expand All @@ -16,75 +16,6 @@
from sentry.utils import metrics


@instrumented_task(
name="sentry.deletions.tasks.groups.delete_groups",
queue="cleanup",
default_retry_delay=60 * 5,
max_retries=MAX_RETRIES,
acks_late=True,
silo_mode=SiloMode.REGION,
taskworker_config=TaskworkerConfig(
namespace=deletion_tasks,
retry=Retry(
times=MAX_RETRIES,
delay=60 * 5,
),
),
)
@retry(exclude=(DeleteAborted,))
@track_group_async_operation
def delete_groups(
object_ids: Sequence[int],
transaction_id: str,
eventstream_state: Mapping[str, Any] | None = None,
**kwargs: Any,
) -> None:
current_batch, rest = object_ids[:GROUP_CHUNK_SIZE], object_ids[GROUP_CHUNK_SIZE:]

# Select first_group from current_batch to ensure project_id tag reflects the current batch
first_group = Group.objects.filter(id__in=current_batch).order_by("id").first()
if not first_group:
raise DeleteAborted("delete_groups.no_group_found")

# The tags can be used if we want to find errors for when a task fails
sentry_sdk.set_tags(
{
"project_id": first_group.project_id,
"transaction_id": transaction_id,
},
)

logger.info(
"delete_groups.started",
extra={
"object_ids_count": len(object_ids),
"object_ids_current_batch": current_batch,
"first_id": first_group.id,
# These can be used when looking for logs in GCP
"project_id": first_group.project_id,
# All tasks initiated by the same request will have the same transaction_id
"transaction_id": transaction_id,
},
)

task = deletions.get(
model=Group, query={"id__in": current_batch}, transaction_id=transaction_id
)
has_more = task.chunk()
if has_more or rest:
delete_groups.apply_async(
kwargs={
"object_ids": object_ids if has_more else rest,
"transaction_id": transaction_id,
"eventstream_state": eventstream_state,
},
)
else:
# all groups have been deleted
if eventstream_state:
eventstream.backend.end_delete_groups(eventstream_state)


@instrumented_task(
name="sentry.deletions.tasks.groups.delete_groups_for_project",
queue="cleanup",
Expand Down Expand Up @@ -142,9 +73,6 @@ def delete_groups_for_project(
f"is greater than GROUP_CHUNK_SIZE"
)

# This is a no-op on the Snuba side, however, one day it may not be.
eventstream_state = eventstream.backend.start_delete_groups(project_id, object_ids)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have Snuba events to delete we will call it here:

eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
eventstream.backend.end_delete_groups(eventstream_state)

We have always been calling it more than once, this can be seen in the tests:
Image

# These can be used for debugging
extra = {"project_id": project_id, "transaction_id": transaction_id}
sentry_sdk.set_tags(extra)
Expand All @@ -162,6 +90,3 @@ def delete_groups_for_project(
# Use this to query the logs
tags={"transaction_id": transaction_id},
)

# This will delete all Snuba events for all deleted groups
eventstream.backend.end_delete_groups(eventstream_state)
7 changes: 1 addition & 6 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,12 +856,7 @@
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"deletions.groups.use-new-task",
type=Bool,
default=False,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"issues.severity.first-event-severity-calculation-projects-allowlist",
type=Sequence,
Expand Down
188 changes: 43 additions & 145 deletions tests/sentry/issues/endpoints/test_organization_group_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4311,78 +4311,55 @@ def assert_deleted_groups(self, groups: Sequence[Group]) -> None:
assert not Group.objects.filter(id=group.id).exists()
assert not GroupHash.objects.filter(group_id=group.id).exists()

@patch("sentry.eventstream.backend")
def test_delete_by_id(self, mock_eventstream: MagicMock) -> None:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test does a few more things than testing that using id= works. I have simplified the test.

eventstream_state = {"event_stream_state": str(uuid4())}
mock_eventstream.start_delete_groups = Mock(return_value=eventstream_state)
@patch("sentry.eventstream.snuba.SnubaEventStream._send")
@patch("sentry.eventstream.snuba.datetime")
def test_delete_by_id(self, mock_datetime: MagicMock, mock_send: MagicMock) -> None:
fixed_datetime = datetime.now()
mock_datetime.now.return_value = fixed_datetime

group1 = self.create_group(status=GroupStatus.RESOLVED)
group2 = self.create_group(status=GroupStatus.UNRESOLVED)
group3 = self.create_group(status=GroupStatus.IGNORED)
group4 = self.create_group(
project=self.create_project(slug="foo"),
status=GroupStatus.UNRESOLVED,
)

hashes = []
for g in group1, group2, group3, group4:
hash = uuid4().hex
hashes.append(hash)
GroupHash.objects.create(project=g.project, hash=hash, group=g)
groups = self.create_n_groups_with_hashes(2, project=self.project)
group_ids = [group.id for group in groups]

self.login_as(user=self.user)
with self.feature("organizations:global-views"):
response = self.get_response(
qs_params={"id": [group1.id, group2.id], "group4": group4.id}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the original test introduced this non-useful parameter by mistake: "group4": group4.id

)

mock_eventstream.start_delete_groups.assert_called_once_with(
group1.project_id, [group1.id, group2.id]
)

assert response.status_code == 204

assert Group.objects.get(id=group1.id).status == GroupStatus.PENDING_DELETION
assert not GroupHash.objects.filter(group_id=group1.id).exists()

assert Group.objects.get(id=group2.id).status == GroupStatus.PENDING_DELETION
assert not GroupHash.objects.filter(group_id=group2.id).exists()

assert Group.objects.get(id=group3.id).status != GroupStatus.PENDING_DELETION
assert GroupHash.objects.filter(group_id=group3.id).exists()

assert Group.objects.get(id=group4.id).status != GroupStatus.PENDING_DELETION
assert GroupHash.objects.filter(group_id=group4.id).exists()

Group.objects.filter(id__in=(group1.id, group2.id)).update(status=GroupStatus.UNRESOLVED)

with self.tasks():
with self.feature("organizations:global-views"):
response = self.get_response(
qs_params={"id": [group1.id, group2.id], "group4": group4.id}
)
with self.tasks(), self.feature("organizations:global-views"):
response = self.get_response(qs_params={"id": group_ids})
assert response.status_code == 204

# XXX(markus): Something is sending duplicated replacements to snuba --
# once from within tasks.deletions.groups and another time from
# sentry.deletions.defaults.groups
assert mock_eventstream.end_delete_groups.call_args_list == [
call(eventstream_state),
call(eventstream_state),
# Extract transaction_id from the first call
transaction_id = mock_send.call_args_list[0][1]["extra_data"][0]["transaction_id"]

assert mock_send.call_args_list == [
call(
self.project.id,
"start_delete_groups",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The start call.

extra_data=(
{
"transaction_id": transaction_id,
"project_id": self.project.id,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned in one of my other PRs, project_id is a required parameter for Snuba (besides the group ids).

"group_ids": group_ids,
"datetime": json.datetime_to_str(fixed_datetime),
},
),
asynchronous=False,
),
call(
self.project.id,
"end_delete_groups",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The end call.

extra_data=(
{
"transaction_id": transaction_id,
"project_id": self.project.id,
"group_ids": group_ids,
"datetime": json.datetime_to_str(fixed_datetime),
},
),
asynchronous=False,
),
]

assert response.status_code == 204

assert not Group.objects.filter(id=group1.id).exists()
assert not GroupHash.objects.filter(group_id=group1.id).exists()

assert not Group.objects.filter(id=group2.id).exists()
assert not GroupHash.objects.filter(group_id=group2.id).exists()

assert Group.objects.filter(id=group3.id).exists()
assert GroupHash.objects.filter(group_id=group3.id).exists()

assert Group.objects.filter(id=group4.id).exists()
assert GroupHash.objects.filter(group_id=group4.id).exists()
for group in groups:
assert not Group.objects.filter(id=group.id).exists()
assert not GroupHash.objects.filter(group_id=group.id).exists()

@patch("sentry.eventstream.backend")
def test_delete_performance_issue_by_id(self, mock_eventstream: MagicMock) -> None:
Expand Down Expand Up @@ -4417,85 +4394,6 @@ def test_bulk_delete_for_many_projects_without_option(self) -> None:
groups_1 = self.create_n_groups_with_hashes(2, project=self.project)
groups_2 = self.create_n_groups_with_hashes(5, project=project_2)

with (
self.tasks(),
patch("sentry.deletions.tasks.groups.GROUP_CHUNK_SIZE", NEW_CHUNK_SIZE),
patch("sentry.deletions.tasks.groups.logger") as mock_logger,
patch(
"sentry.api.helpers.group_index.delete.uuid4",
side_effect=[self.get_mock_uuid("foo"), self.get_mock_uuid("bar")],
),
):
self.login_as(user=self.user)
response = self.get_success_response(qs_params={"query": ""})
assert response.status_code == 204
batch_1 = [g.id for g in groups_2[0:2]]
batch_2 = [g.id for g in groups_2[2:4]]
batch_3 = [g.id for g in groups_2[4:]]
assert batch_1 + batch_2 + batch_3 == [g.id for g in groups_2]

calls_by_project: dict[int, list[tuple[str, dict[str, Any]]]] = defaultdict(list)
for log_call in mock_logger.info.call_args_list:
calls_by_project[log_call[1]["extra"]["project_id"]].append(log_call)

assert len(calls_by_project) == 2
assert calls_by_project[self.project.id] == [
call(
"delete_groups.started",
extra={
"object_ids_count": len(groups_1),
"object_ids_current_batch": [g.id for g in groups_1],
"first_id": groups_1[0].id,
"project_id": self.project.id,
"transaction_id": "bar",
},
),
]
assert calls_by_project[project_2.id] == [
call(
"delete_groups.started",
extra={
"object_ids_count": 5,
"object_ids_current_batch": batch_1,
"first_id": batch_1[0],
"project_id": project_2.id,
"transaction_id": "foo",
},
),
call(
"delete_groups.started",
extra={
"object_ids_count": 3,
"object_ids_current_batch": batch_2,
"first_id": batch_2[0],
"project_id": project_2.id,
"transaction_id": "foo",
},
),
call(
"delete_groups.started",
extra={
"object_ids_count": 1,
"object_ids_current_batch": batch_3,
"first_id": batch_3[0],
"project_id": project_2.id,
"transaction_id": "foo",
},
),
]

self.assert_deleted_groups(groups_1 + groups_2)

def test_bulk_delete_for_many_projects_with_option(self) -> None:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is now the default one.

NEW_CHUNK_SIZE = 2
with (
self.options({"deletions.groups.use-new-task": True}),
self.feature("organizations:global-views"),
):
project_2 = self.create_project(slug="baz", organization=self.organization)
groups_1 = self.create_n_groups_with_hashes(2, project=self.project)
groups_2 = self.create_n_groups_with_hashes(5, project=project_2)

with (
self.tasks(),
patch("sentry.api.helpers.group_index.delete.GROUP_CHUNK_SIZE", NEW_CHUNK_SIZE),
Expand Down
4 changes: 2 additions & 2 deletions tests/snuba/api/endpoints/test_group_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def test_delete_error_issue(self) -> Any:
url = f"/api/0/issues/{group.id}/"

with patch(
"sentry.api.helpers.group_index.delete.delete_groups_task.apply_async"
"sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async"
) as mock_apply_async:
response = self.client.delete(url, format="json")
mock_apply_async.assert_called_once()
Expand All @@ -353,7 +353,7 @@ def test_delete_issue_platform_issue(self) -> Any:
url = f"/api/0/issues/{group.id}/"

with patch(
"sentry.api.helpers.group_index.delete.delete_groups_task.apply_async"
"sentry.api.helpers.group_index.delete.delete_groups_for_project.apply_async"
) as mock_apply_async:
response = self.client.delete(url, format="json")
assert response.status_code == 202
Expand Down
Loading
Loading