Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# SPDX-FileCopyrightText: 2025 Weibo, Inc.
#
# SPDX-License-Identifier: Apache-2.0

"""Add subtask_attachments table for file upload support

Revision ID: add_subtask_attachments
Revises:
Create Date: 2025-12-03

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = 'add_subtask_attachments'
down_revision: Union[str, None] = 'a1b2c3d4e5f6'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Create subtask_attachments table."""
op.create_table(
'subtask_attachments',
sa.Column('id', sa.Integer(), nullable=False, autoincrement=True),
sa.Column('subtask_id', sa.Integer(), nullable=True),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('original_filename', sa.String(255), nullable=False),
sa.Column('file_extension', sa.String(20), nullable=False),
sa.Column('file_size', sa.Integer(), nullable=False),
sa.Column('mime_type', sa.String(100), nullable=False),
sa.Column('binary_data', sa.LargeBinary(), nullable=False),
sa.Column('extracted_text', sa.Text(), nullable=True),
sa.Column('text_length', sa.Integer(), nullable=True),
sa.Column('status', sa.Enum('uploading', 'parsing', 'ready', 'failed', name='attachmentstatus'), nullable=False),
sa.Column('error_message', sa.String(500), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.ForeignKeyConstraint(['subtask_id'], ['subtasks.id'], ondelete='CASCADE'),
mysql_charset='utf8mb4',
mysql_collate='utf8mb4_unicode_ci',
mysql_engine='InnoDB'
)

# Create indexes
op.create_index('ix_subtask_attachments_id', 'subtask_attachments', ['id'])
op.create_index('ix_subtask_attachments_subtask_id', 'subtask_attachments', ['subtask_id'])
op.create_index('ix_subtask_attachments_user_id', 'subtask_attachments', ['user_id'])


def downgrade() -> None:
"""Drop subtask_attachments table."""
op.drop_index('ix_subtask_attachments_user_id', table_name='subtask_attachments')
op.drop_index('ix_subtask_attachments_subtask_id', table_name='subtask_attachments')
op.drop_index('ix_subtask_attachments_id', table_name='subtask_attachments')
op.drop_table('subtask_attachments')

# Drop the enum type
op.execute("DROP TYPE IF EXISTS attachmentstatus")
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# SPDX-FileCopyrightText: 2025 Weibo, Inc.
#
# SPDX-License-Identifier: Apache-2.0

"""Fix binary_data column size to LONGBLOB for large file support

Revision ID: fix_binary_data_size
Revises: add_subtask_attachments
Create Date: 2025-12-03

This migration changes the binary_data column from BLOB (64KB max) to LONGBLOB (4GB max)
to support larger file uploads like PDFs and documents.
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql


# revision identifiers, used by Alembic.
revision: str = 'fix_binary_data_size'
down_revision: Union[str, None] = 'add_subtask_attachments'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Change binary_data column to LONGBLOB for MySQL."""
# Use MySQL-specific LONGBLOB type for large file support
op.alter_column(
'subtask_attachments',
'binary_data',
existing_type=sa.LargeBinary(),
type_=mysql.LONGBLOB(),
existing_nullable=False
)

# Also change extracted_text to LONGTEXT for large documents
op.alter_column(
'subtask_attachments',
'extracted_text',
existing_type=sa.Text(),
type_=mysql.LONGTEXT(),
existing_nullable=True
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# Check for dialect checks and MySQL-specific patterns in migrations
cd /tmp && find . -name "backend" -type d 2>/dev/null | head -1 | xargs -I {} bash -c 'cd {} && pwd'

Repository: wecode-ai/Wegent

Length of output: 42


🏁 Script executed:

#!/bin/bash
# Explore the migration structure first
echo "=== Finding Alembic versions directory ==="
find . -path "*/backend/alembic/versions" -type d 2>/dev/null

echo -e "\n=== Listing migrations ==="
ls -la backend/alembic/versions/ 2>/dev/null | head -20

Repository: wecode-ai/Wegent

Length of output: 739


🏁 Script executed:

#!/bin/bash
# Check dialect checks pattern across migrations
echo "=== Checking for dialect/bind checks in migrations ==="
rg -n "dialect\.name|get_bind\(\)|if.*dialect" backend/alembic/versions/ || echo "No dialect checks found"

echo -e "\n=== Checking MySQL-specific type usage ==="
rg -n "mysql\.(LONGBLOB|LONGTEXT|MEDIUMBLOB|MEDIUMTEXT)" backend/alembic/versions/ || echo "No other MySQL-specific types found"

echo -e "\n=== Checking for the subtask_attachments table creation ==="
rg -n "subtask_attachments" backend/alembic/versions/ -B 2 -A 2

Repository: wecode-ai/Wegent

Length of output: 8285


🏁 Script executed:

#!/bin/bash
# Check database configuration and strategy
echo "=== Checking database configuration files ==="
find backend -name "*.py" -type f | xargs rg -l "DATABASE_URL|SQLALCHEMY_DATABASE_URI|sqlalchemy\.create_engine" | head -10

echo -e "\n=== Checking alembic.ini or configuration ==="
if [ -f "backend/alembic.ini" ]; then
  grep -i "sqlalchemy" backend/alembic.ini || echo "No sqlalchemy config found in alembic.ini"
fi

echo -e "\n=== Checking env.py for database configuration ==="
if [ -f "backend/alembic/env.py" ]; then
  head -50 backend/alembic/env.py
fi

Repository: wecode-ai/Wegent

Length of output: 2045


🏁 Script executed:

#!/bin/bash
# Search the migration file itself to understand the full context
echo "=== Reading the migration file ==="
cat -n backend/alembic/versions/fix_binary_data_size_fix_binary_data_column_size.py 2>/dev/null | head -60

echo -e "\n=== Looking for table creation migration ==="
rg -n "CreateTable.*subtask_attachments|subtask_attachments.*CreateTable" backend/alembic/versions/ -A 5 -B 1 | head -40

Repository: wecode-ai/Wegent

Length of output: 2213


🏁 Script executed:

#!/bin/bash
# Read the table creation migration to verify original column types
cat -n backend/alembic/versions/add_subtask_attachments_add_subtask_attachments_table.py | head -50

Repository: wecode-ai/Wegent

Length of output: 2323


🏁 Script executed:

#!/bin/bash
# Check if there's any database requirement documentation
echo "=== Checking for database strategy docs ==="
find . -name "README*" -o -name "ARCHITECTURE*" -o -name "DATABASE*" | xargs grep -l "database\|MySQL\|PostgreSQL" 2>/dev/null | head -5

echo -e "\n=== Checking backend config for database requirements ==="
head -50 backend/app/core/config.py | grep -A 10 -B 2 "DATABASE\|database"

Repository: wecode-ai/Wegent

Length of output: 712


Add dialect checks or document MySQL-only database requirement.

The migration switches from database-agnostic types (sa.LargeBinary, sa.Text) to MySQL-specific types (mysql.LONGBLOB, mysql.LONGTEXT). While the table creation uses MySQL options, the original columns were intentionally defined with generic SQLAlchemy types. This migration creates vendor lock-in without explicit documentation.

Either add dialect checks to handle multiple databases:

def upgrade() -> None:
    """Change binary_data column to LONGBLOB for MySQL."""
    bind = op.get_bind()
    
    if bind.dialect.name == 'mysql':
        op.alter_column(
            'subtask_attachments',
            'binary_data',
            existing_type=sa.LargeBinary(),
            type_=mysql.LONGBLOB(),
            existing_nullable=False
        )
        op.alter_column(
            'subtask_attachments',
            'extracted_text',
            existing_type=sa.Text(),
            type_=mysql.LONGTEXT(),
            existing_nullable=True
        )

Or add a module-level docstring clarifying that MySQL is the only supported database:

"""Fix binary_data column size to LONGBLOB for large file support

This migration uses MySQL-specific types and is only compatible with MySQL databases.
"""
🤖 Prompt for AI Agents
In backend/alembic/versions/fix_binary_data_size_fix_binary_data_column_size.py
around lines 28 to 46, the migration replaces generic SQLAlchemy types with
MySQL-specific types which creates unintended vendor lock-in; either add a
runtime dialect check using op.get_bind() and only apply mysql.LONGBLOB and
mysql.LONGTEXT when bind.dialect.name == 'mysql' (skip or no-op for other DBs),
or add a module-level docstring stating this migration is MySQL-only and will
fail on other backends so reviewers/operators are aware.



def downgrade() -> None:
"""Revert to original column types."""
op.alter_column(
'subtask_attachments',
'binary_data',
existing_type=mysql.LONGBLOB(),
type_=sa.LargeBinary(),
existing_nullable=False
)

op.alter_column(
'subtask_attachments',
'extracted_text',
existing_type=mysql.LONGTEXT(),
type_=sa.Text(),
existing_nullable=True
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Document potential data loss risk during downgrade.

The downgrade reverts LONGBLOBLargeBinary (BLOB) and LONGTEXTText, which could truncate data if files larger than BLOB/Text limits exist. While downgrades are typically performed in controlled environments, the risk should be documented.

Consider adding a warning comment:

 def downgrade() -> None:
-    """Revert to original column types."""
+    """Revert to original column types.
+    
+    WARNING: This downgrade will fail or truncate data if any attachments
+    exceed BLOB (64KB) or TEXT (64KB) limits. Ensure no large files exist
+    before downgrading, or manually delete large attachments first.
+    """
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def downgrade() -> None:
"""Revert to original column types."""
op.alter_column(
'subtask_attachments',
'binary_data',
existing_type=mysql.LONGBLOB(),
type_=sa.LargeBinary(),
existing_nullable=False
)
op.alter_column(
'subtask_attachments',
'extracted_text',
existing_type=mysql.LONGTEXT(),
type_=sa.Text(),
existing_nullable=True
)
def downgrade() -> None:
"""Revert to original column types.
WARNING: This downgrade will fail or truncate data if any attachments
exceed BLOB (64KB) or TEXT (64KB) limits. Ensure no large files exist
before downgrading, or manually delete large attachments first.
"""
op.alter_column(
'subtask_attachments',
'binary_data',
existing_type=mysql.LONGBLOB(),
type_=sa.LargeBinary(),
existing_nullable=False
)
op.alter_column(
'subtask_attachments',
'extracted_text',
existing_type=mysql.LONGTEXT(),
type_=sa.Text(),
existing_nullable=True
)
🤖 Prompt for AI Agents
In backend/alembic/versions/fix_binary_data_size_fix_binary_data_column_size.py
around lines 49 to 65, the downgrade changes LONGBLOB→LargeBinary and
LONGTEXT→Text which can truncate data for values exceeding BLOB/Text limits; add
a clear warning comment immediately above the downgrade function documenting
this risk, instructing operators to backup the database, verify no column values
exceed target sizes (or refuse to run downgrade until verified), and recommend
an alternative migration strategy or manual cleanup to prevent data loss during
downgrade.

4 changes: 4 additions & 0 deletions backend/app/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from app.api.endpoints import admin, auth, oidc, quota, repository, users
from app.api.endpoints.adapter import (
agents,
attachments,
bots,
chat,
dify,
executors,
models,
Expand All @@ -26,6 +28,8 @@
api_router.include_router(agents.router, prefix="/agents", tags=["public-shell"])
api_router.include_router(teams.router, prefix="/teams", tags=["teams"])
api_router.include_router(tasks.router, prefix="/tasks", tags=["tasks"])
api_router.include_router(chat.router, prefix="/chat", tags=["chat"])
api_router.include_router(attachments.router, prefix="/attachments", tags=["attachments"])
api_router.include_router(repository.router, prefix="/git", tags=["repository"])
api_router.include_router(executors.router, prefix="/executors", tags=["executors"])
api_router.include_router(quota.router, prefix="/quota", tags=["quota"])
Expand Down
Loading
Loading