From 63482334dd26cf70b936e7c0a6f2d3c56ce33130 Mon Sep 17 00:00:00 2001 From: Allan Lasser Date: Thu, 9 Oct 2025 11:52:54 -0400 Subject: [PATCH 1/3] Prevent add on run export from crashing with queryset iterator --- documentcloud/addons/admin.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/documentcloud/addons/admin.py b/documentcloud/addons/admin.py index f8662bd7..e08ada1a 100644 --- a/documentcloud/addons/admin.py +++ b/documentcloud/addons/admin.py @@ -128,7 +128,10 @@ def export_runs_as_csv(self, request, queryset): """Export selected Add-On Runs to CSV.""" field_names = [ "addon_id", + "addon_name", "user_id", + "user_name", + "user_email", "run_id", "status", "rating", @@ -143,11 +146,28 @@ def export_runs_as_csv(self, request, queryset): writer = csv.writer(response) writer.writerow(field_names) - for run in queryset: + limited_queryset = queryset.select_related("addon", "user").only( + "addon_id", + "user_id", + "run_id", + "status", + "rating", + "credits_spent", + "created_at", + "updated_at", + "addon__name", + "user__name", + "user__email", + ) + + for run in limited_queryset.iterator(chunk_size=2000): writer.writerow( [ run.addon_id, + run.addon.name, run.user_id, + run.user.name, + run.user.email, run.run_id, run.status, run.rating, From 66081e4de7db178c7b7e5aeb66d0d22be7091a0b Mon Sep 17 00:00:00 2001 From: Allan Lasser Date: Thu, 9 Oct 2025 11:58:04 -0400 Subject: [PATCH 2/3] Use configurable chunk size setting instead of hardcoding value --- config/settings/base.py | 3 +++ documentcloud/addons/admin.py | 3 ++- documentcloud/users/admin.py | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/config/settings/base.py b/config/settings/base.py index 7bb600ef..832f4873 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -290,6 +290,9 @@ ADMINS = [("Mitchell Kotler", "mitch@muckrock.com")] # https://docs.djangoproject.com/en/dev/ref/settings/#managers MANAGERS = ADMINS +# Chunk size for CSV exports using .iterator() to process large querysets +# without loading all records into memory at once +CSV_EXPORT_CHUNK_SIZE = env.int("CSV_EXPORT_CHUNK_SIZE", default=2000) # LOGGING # ------------------------------------------------------------------------------ diff --git a/documentcloud/addons/admin.py b/documentcloud/addons/admin.py index e08ada1a..7b8db70a 100644 --- a/documentcloud/addons/admin.py +++ b/documentcloud/addons/admin.py @@ -1,4 +1,5 @@ # Django +from django.conf import settings from django.contrib import admin, messages from django.db.models import JSONField from django.forms import widgets @@ -160,7 +161,7 @@ def export_runs_as_csv(self, request, queryset): "user__email", ) - for run in limited_queryset.iterator(chunk_size=2000): + for run in limited_queryset.iterator(chunk_size=settings.CSV_EXPORT_CHUNK_SIZE): writer.writerow( [ run.addon_id, diff --git a/documentcloud/users/admin.py b/documentcloud/users/admin.py index 61cc6e3a..a92fb0cf 100644 --- a/documentcloud/users/admin.py +++ b/documentcloud/users/admin.py @@ -1,4 +1,5 @@ # Django +from django.conf import settings from django.contrib import admin from django.http.response import HttpResponse from django.urls.conf import re_path @@ -47,7 +48,7 @@ def format_date(date): writer.writerow(["username", "name", "email", "last_login", "date_joined"]) for user in User.objects.only( "username", "name", "email", "last_login", "created_at" - ).iterator(chunk_size=2000): + ).iterator(chunk_size=settings.CSV_EXPORT_CHUNK_SIZE): writer.writerow( [ user.username, From 3379eaa8f015bce98ca7ce59f028b6f997a2da24 Mon Sep 17 00:00:00 2001 From: Allan Lasser Date: Thu, 9 Oct 2025 11:59:41 -0400 Subject: [PATCH 3/3] Use queryset interator in other export functions --- documentcloud/organizations/admin.py | 3 ++- documentcloud/statistics/admin.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/documentcloud/organizations/admin.py b/documentcloud/organizations/admin.py index 2e7d0c79..f347c5d1 100644 --- a/documentcloud/organizations/admin.py +++ b/documentcloud/organizations/admin.py @@ -1,4 +1,5 @@ # Django +from django.conf import settings from django.contrib import admin from django.http import HttpResponse @@ -41,7 +42,7 @@ def export_ai_credit_logs(self, request, queryset): writer = csv.writer(response) writer.writerow(field_names) - for log in queryset: + for log in queryset.iterator(chunk_size=settings.CSV_EXPORT_CHUNK_SIZE): writer.writerow( [ str(log.organization), diff --git a/documentcloud/statistics/admin.py b/documentcloud/statistics/admin.py index 66a3249e..81d9babe 100644 --- a/documentcloud/statistics/admin.py +++ b/documentcloud/statistics/admin.py @@ -1,4 +1,5 @@ # Django +from django.conf import settings from django.contrib import admin from django.http import HttpResponse @@ -29,7 +30,7 @@ def export_statistics_as_csv(self, request, queryset): writer = csv.writer(response) writer.writerow(field_names) - for obj in queryset: + for obj in queryset.iterator(chunk_size=settings.CSV_EXPORT_CHUNK_SIZE): row = [] for field_name in field_names: value = getattr(obj, field_name)