diff --git a/dashboard/README.md b/dashboard/README.md new file mode 100644 index 0000000..a56935d --- /dev/null +++ b/dashboard/README.md @@ -0,0 +1,321 @@ +# šŸš€ Kafka Dashboard + +Interactive web dashboard for monitoring and analyzing Kafka cluster health and performance metrics. + +## šŸ“Š Features + +- **Real-time Health Monitoring** - Live cluster health score and status indicators +- **Interactive Charts** - Plotly-powered visualizations with hover details and zoom +- **Health Checks Analysis** - Detailed breakdown of all health check results +- **Topics Overview** - Partition distribution, replication factors, and topic analytics +- **Consumer Groups Monitoring** - Active/inactive groups and member status +- **Broker Information** - Cluster metadata and broker details +- **Auto-refresh** - Automatically updates every 30 seconds +- **Responsive Design** - Works on desktop, tablet, and mobile devices + +## šŸŽÆ Screenshots + +### Main Dashboard + +- Health score gauge with color-coded status +- Metrics cards showing key cluster statistics +- Distribution charts for topics and consumer groups + +### Health Checks + +- Comprehensive table of all health check results +- Color-coded status indicators (āœ… Passed, āŒ Failed, āš ļø Warning) +- Recommendations for each check + +## šŸ› ļø Installation + +### Prerequisites + +- Python 3.8+ +- Kafka analysis reports (generated by the main analyzer) + +### Quick Start + +1. **Install dependencies:** + + ```bash + cd dashboard + pip install -r requirements.txt + ``` + +2. **Generate analysis data (from parent directory):** + + ```bash + cd .. + npx superstream-kafka-analyzer --config kraft-config.json + ``` + +3. **Run dashboard:** + + ```bash + cd dashboard + python run_dashboard.py + ``` + +4. **Open browser:** + ``` + http://localhost:8050 + ``` + +### Advanced Installation + +```bash +# Install with auto-dependency installation +python run_dashboard.py --install + +# Run on different port +python run_dashboard.py --port 8080 + +# Use custom data directory +python run_dashboard.py --data-dir /path/to/kafka-reports + +# Run in debug mode +python run_dashboard.py --debug + +# Bind to all interfaces (accessible from other machines) +python run_dashboard.py --host 0.0.0.0 +``` + +## šŸ“ Project Structure + +``` +dashboard/ +ā”œā”€ā”€ app.py # Main dashboard application +ā”œā”€ā”€ run_dashboard.py # Launcher script with dependency checking +ā”œā”€ā”€ requirements.txt # Python dependencies +ā”œā”€ā”€ README.md # This file +ā”œā”€ā”€ components/ +│ ā”œā”€ā”€ charts.py # Chart generation components +│ └── layout.py # UI layout components +ā”œā”€ā”€ utils/ +│ └── data_loader.py # Data loading and processing utilities +└── assets/ # Static assets (CSS, images) +``` + +## šŸ”§ Configuration + +### Data Directory + +The dashboard looks for analysis files in these locations (in order): + +1. `./kafka-analysis` (current directory) +2. `../kafka-analysis` (parent directory) +3. `../../kafka-analysis` (grandparent directory) +4. `~/kafka-analysis` (home directory) + +### File Format + +The dashboard expects JSON files with this naming pattern: + +``` +kafka-analysis-{timestamp}.json +``` + +## šŸ“Š Dashboard Sections + +### 1. **Metrics Cards** + +- Total Brokers +- Total Topics +- Total Partitions +- Consumer Groups +- Active Groups +- Average Partitions per Topic + +### 2. **Health Score Gauge** + +- Overall cluster health percentage +- Color-coded indicator (Red < 70%, Yellow 70-90%, Green > 90%) +- Delta from target (90%) + +### 3. **Health Checks Summary** + +- Bar chart showing passed/failed/warning counts +- Interactive tooltips with details + +### 4. **Topics Distribution** + +- Pie chart of user vs internal topics +- Total count in center + +### 5. **Partitions per Topic** + +- Bar chart showing partition distribution +- Excludes internal topics for clarity + +### 6. **Consumer Groups Status** + +- Pie chart of active vs inactive groups +- Helps identify unused consumer groups + +### 7. **Replication Factor Distribution** + +- Bar chart showing RF distribution across topics +- Helps identify replication inconsistencies + +### 8. **Cluster Information Card** + +- Vendor information +- Cluster ID and controller +- Last analysis timestamp +- Broker details with badges + +### 9. **Detailed Health Checks Table** + +- Comprehensive table of all health checks +- Sortable and filterable columns +- Color-coded rows by status +- Recommendations for each check + +## šŸ”„ Data Refresh + +### Automatic Refresh + +- Dashboard auto-refreshes every 30 seconds +- Loads the latest analysis file automatically +- Updates timestamp in top-right corner + +### Manual Refresh + +- Click "šŸ”„ Refresh Now" button +- Immediately loads latest data +- Useful for testing or immediate updates + +## šŸŽØ Customization + +### Themes + +The dashboard uses Bootstrap themes and can be customized by modifying: + +- `external_stylesheets` in `app.py` +- Custom CSS in the `app.index_string` + +### Charts + +Chart appearance can be modified in `components/charts.py`: + +- Colors and styling +- Chart types and layouts +- Hover templates and annotations + +### Layout + +UI components can be customized in `components/layout.py`: + +- Card designs and layouts +- Metrics and badges +- Table styling + +## šŸ› Troubleshooting + +### Common Issues + +**No data available:** + +```bash +# Generate analysis reports first +cd /path/to/kafka-analyzer +npx superstream-kafka-analyzer --config config.json +``` + +**Dependencies missing:** + +```bash +# Install required packages +pip install -r requirements.txt +# Or use auto-install +python run_dashboard.py --install +``` + +**Port already in use:** + +```bash +# Use different port +python run_dashboard.py --port 8080 +``` + +**Permission denied:** + +```bash +# Make launcher executable (Linux/Mac) +chmod +x run_dashboard.py +``` + +### Debug Mode + +Run with debug flag for detailed error information: + +```bash +python run_dashboard.py --debug +``` + +## šŸ“ˆ Performance + +### Optimization Tips + +- Dashboard is optimized for files up to 100MB +- For large clusters (1000+ topics), consider: + - Filtering internal topics in visualizations + - Pagination for large tables + - Caching for historical data + +### Memory Usage + +- Typical memory usage: 50-100MB +- Scales with number of topics and historical reports +- Auto-cleanup of old data stores + +## šŸ”’ Security + +### Network Access + +- Default binding: `127.0.0.1` (localhost only) +- For network access: use `--host 0.0.0.0` +- Consider reverse proxy for production use + +### Data Privacy + +- All processing happens locally +- No external API calls (except CDN for fonts) +- Analysis data stays on your machine + +## šŸ¤ Contributing + +### Development Setup + +```bash +# Clone and setup +git clone +cd dashboard + +# Install dev dependencies +pip install -r requirements.txt + +# Run in debug mode +python run_dashboard.py --debug +``` + +### Adding Features + +1. Create new components in `components/` +2. Add data processing in `utils/` +3. Register callbacks in `app.py` +4. Update layout as needed + +## šŸ“„ License + +MIT License - see parent project for details. + +## šŸ†˜ Support + +For issues and questions: + +- Check troubleshooting section above +- Review console output with `--debug` flag +- Ensure analysis files are generated correctly +- Verify Python version (3.8+ required) diff --git a/dashboard/__pycache__/app.cpython-312.pyc b/dashboard/__pycache__/app.cpython-312.pyc new file mode 100644 index 0000000..a576f1c Binary files /dev/null and b/dashboard/__pycache__/app.cpython-312.pyc differ diff --git a/dashboard/app.py b/dashboard/app.py new file mode 100644 index 0000000..2dde7fb --- /dev/null +++ b/dashboard/app.py @@ -0,0 +1,339 @@ +""" +Main Kafka Dashboard Application +Interactive dashboard for monitoring Kafka cluster health and analytics +""" + +import dash +from dash import dcc, html, Input, Output, callback, State +import dash_bootstrap_components as dbc +import plotly.graph_objects as go +from datetime import datetime +import os +import sys + +# Add parent directory to path for imports +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from utils.data_loader import KafkaDataLoader, HistoricalDataProcessor +from components.charts import ChartBuilder, MetricsCards +from components.layout import LayoutComponents, TabsLayout + + +class KafkaDashboard: + """Main dashboard application class""" + + def __init__(self, data_dir: str = "../kafka-analysis"): + self.data_dir = data_dir + self.data_loader = KafkaDataLoader(data_dir) + self.chart_builder = ChartBuilder() + self.layout_components = LayoutComponents() + + # Initialize Dash app + self.app = dash.Dash( + __name__, + external_stylesheets=[ + dbc.themes.BOOTSTRAP, + "https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap" + ], + suppress_callback_exceptions=True + ) + + # Set custom CSS + self.app.index_string = ''' + + + + {%metas%} + Kafka Dashboard + {%favicon%} + {%css%} + + + + {%app_entry%} + + + + ''' + + self.setup_layout() + self.setup_callbacks() + + def setup_layout(self): + """Setup the main dashboard layout""" + self.app.layout = dbc.Container([ + # Header + self.layout_components.create_header(), + + # Refresh controls + self.layout_components.create_refresh_controls(), + + # Main content area + html.Div(id="main-content"), + + # Auto-refresh interval + dcc.Interval( + id='interval-component', + interval=30*1000, # Update every 30 seconds + n_intervals=0 + ), + + # Data store + dcc.Store(id='kafka-data'), + dcc.Store(id='historical-data') + + ], fluid=True) + + def setup_callbacks(self): + """Setup dashboard callbacks""" + + @self.app.callback( + [Output('kafka-data', 'data'), + Output('historical-data', 'data'), + Output('last-updated', 'children')], + [Input('interval-component', 'n_intervals'), + Input('refresh-button', 'n_clicks')] + ) + def update_data(n_intervals, refresh_clicks): + """Update data from latest reports""" + # Load latest report + latest_data = self.data_loader.get_latest_report() + + # Load all reports for historical analysis + all_reports = self.data_loader.get_all_reports() + + # Current timestamp + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + return latest_data, all_reports, current_time + + @self.app.callback( + Output('main-content', 'children'), + Input('kafka-data', 'data') + ) + def update_main_content(kafka_data): + """Update main content based on available data""" + if not kafka_data: + return self.layout_components.create_no_data_message() + + return self.create_dashboard_content(kafka_data) + + def create_dashboard_content(self, kafka_data): + """Create the main dashboard content""" + # Extract data summaries + health_score = self.data_loader.get_health_score(kafka_data) + topics_summary = self.data_loader.get_topics_summary(kafka_data) + broker_info = self.data_loader.get_broker_info(kafka_data) + consumer_summary = self.data_loader.get_consumer_groups_summary(kafka_data) + + # Create metrics cards + metrics = MetricsCards.create_cluster_metrics( + broker_info, topics_summary, consumer_summary + ) + + return html.Div([ + # Metrics cards + self.layout_components.create_metrics_cards(metrics), + + # Charts row 1 + dbc.Row([ + dbc.Col([ + self.layout_components.create_chart_card( + "health-score-gauge", + "Cluster Health Score", + "Overall health percentage based on passed checks" + ) + ], width=12, lg=4), + dbc.Col([ + self.layout_components.create_chart_card( + "health-checks-summary", + "Health Checks Summary", + "Breakdown of health check results" + ) + ], width=12, lg=4), + dbc.Col([ + self.layout_components.create_chart_card( + "topics-distribution", + "Topics Distribution", + "User vs Internal topics" + ) + ], width=12, lg=4) + ], className="mb-4"), + + # Charts row 2 + dbc.Row([ + dbc.Col([ + self.layout_components.create_chart_card( + "partitions-chart", + "Partitions per Topic", + "Distribution of partitions across user topics" + ) + ], width=12, lg=6), + dbc.Col([ + self.layout_components.create_chart_card( + "consumer-groups-chart", + "Consumer Groups Status", + "Active vs Inactive consumer groups" + ) + ], width=12, lg=6) + ], className="mb-4"), + + # Charts row 3 + dbc.Row([ + dbc.Col([ + self.layout_components.create_chart_card( + "replication-factor-chart", + "Replication Factor Distribution", + "Distribution of replication factors across topics" + ) + ], width=12, lg=6), + dbc.Col([ + self.layout_components.create_cluster_info_card("cluster-info") + ], width=12, lg=6) + ], className="mb-4"), + + # Health details table + dbc.Row([ + dbc.Col([ + self.layout_components.create_health_details_table("health-details-table") + ], width=12) + ]), + + # Hidden divs for charts data + html.Div(id="chart-data", style={"display": "none"}) + ]) + + def create_chart_callbacks(self): + """Create callbacks for chart updates""" + + @self.app.callback( + [Output('health-score-gauge', 'figure'), + Output('health-checks-summary', 'figure'), + Output('topics-distribution', 'figure'), + Output('partitions-chart', 'figure'), + Output('consumer-groups-chart', 'figure'), + Output('replication-factor-chart', 'figure'), + Output('cluster-info', 'children'), + Output('health-details-table', 'children')], + Input('kafka-data', 'data') + ) + def update_charts(kafka_data): + """Update all charts with new data""" + if not kafka_data: + # Return empty charts + empty_fig = go.Figure() + empty_content = html.Div("No data available") + return (empty_fig, empty_fig, empty_fig, empty_fig, + empty_fig, empty_fig, empty_content, empty_content) + + # Extract data + health_score = self.data_loader.get_health_score(kafka_data) + health_data = kafka_data.get('healthChecks', {}) + topics_summary = self.data_loader.get_topics_summary(kafka_data) + topics = kafka_data.get('topics', []) + consumer_groups = kafka_data.get('consumerGroups', []) + broker_info = self.data_loader.get_broker_info(kafka_data) + + # Create charts + health_gauge = self.chart_builder.create_health_score_gauge(health_score) + health_summary = self.chart_builder.create_health_checks_summary(health_data) + topics_dist = self.chart_builder.create_topics_distribution(topics_summary) + partitions_chart = self.chart_builder.create_partitions_per_topic(topics) + consumer_chart = self.chart_builder.create_consumer_groups_chart(consumer_groups) + replication_chart = self.chart_builder.create_replication_factor_chart(topics) + + # Create cluster info + cluster_info = self.create_cluster_info_content(kafka_data, broker_info) + + # Create health details table + health_details = self.create_health_details_table(kafka_data) + + return (health_gauge, health_summary, topics_dist, partitions_chart, + consumer_chart, replication_chart, cluster_info, health_details) + + def create_cluster_info_content(self, kafka_data, broker_info): + """Create cluster information content""" + timestamp = kafka_data.get('timestamp', 'Unknown') + vendor = kafka_data.get('vendor', 'Unknown') + + return html.Div([ + html.P([html.Strong("šŸ¢ Vendor: "), vendor]), + html.P([html.Strong("šŸ†” Cluster ID: "), broker_info.get('cluster_id', 'Unknown')]), + html.P([html.Strong("šŸ‘‘ Controller: "), str(broker_info.get('controller', 'Unknown'))]), + html.P([html.Strong("šŸ“… Last Analysis: "), timestamp]), + html.P([html.Strong("šŸ–„ļø Total Brokers: "), str(broker_info.get('total_brokers', 0))]), + html.Hr(), + html.H6("šŸ“” Broker Details:"), + html.Div([ + dbc.Badge(f"Broker {broker.get('nodeId', 'Unknown')}", + color="secondary", className="me-2 mb-2") + for broker in broker_info.get('brokers', []) + ]) + ]) + + def create_health_details_table(self, kafka_data): + """Create health details table""" + # Extract health check details + health_details = self.data_loader.extract_health_checks_details(kafka_data) + + if not health_details: + return html.Div([ + html.P("No health check details available", + className="text-muted text-center p-4") + ]) + + # Process for table display + table_data = [] + for check in health_details: + table_data.append({ + 'status': self.get_status_emoji(check.get('status', 'UNKNOWN')), + 'name': check.get('name', 'Unknown Check'), + 'message': check.get('message', 'No message'), + 'recommendation': check.get('recommendation', 'No recommendation') + }) + + return self.layout_components.create_data_table(table_data, "health-table") + + def get_status_emoji(self, status): + """Get emoji for status""" + status_map = { + 'PASSED': 'āœ… PASSED', + 'FAILED': 'āŒ FAILED', + 'WARNING': 'āš ļø WARNING', + 'INFO': 'ā„¹ļø INFO' + } + return status_map.get(status, '? UNKNOWN') + + def run(self, debug=True, port=8050, host='127.0.0.1'): + """Run the dashboard""" + # Register chart callbacks + self.create_chart_callbacks() + + print(f"šŸš€ Starting Kafka Dashboard on http://{host}:{port}") + print(f"šŸ“ Data directory: {os.path.abspath(self.data_dir)}") + + # Check if data directory exists + if not os.path.exists(self.data_dir): + print(f"āš ļø Warning: Data directory '{self.data_dir}' not found") + print("šŸ’” Run the Kafka analyzer first to generate reports") + + self.app.run_server(debug=debug, port=port, host=host) + + +if __name__ == "__main__": + dashboard = KafkaDashboard() + dashboard.run() diff --git a/dashboard/assets/custom.css b/dashboard/assets/custom.css new file mode 100644 index 0000000..a361921 --- /dev/null +++ b/dashboard/assets/custom.css @@ -0,0 +1,116 @@ +/* Custom CSS for Kafka Dashboard */ + +/* Global Styles */ +body { + font-family: 'Inter', sans-serif !important; + background-color: #f8f9fa !important; +} + +/* Card Borders */ +.border-left-primary { + border-left: 4px solid #007bff !important; +} + +.border-left-success { + border-left: 4px solid #28a745 !important; +} + +.border-left-info { + border-left: 4px solid #17a2b8 !important; +} + +.border-left-warning { + border-left: 4px solid #ffc107 !important; +} + +.border-left-secondary { + border-left: 4px solid #6c757d !important; +} + +.border-left-danger { + border-left: 4px solid #dc3545 !important; +} + +/* Dashboard specific styles */ +.dashboard-header { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + border-radius: 10px; + padding: 2rem; + margin-bottom: 2rem; +} + +.metric-card { + transition: transform 0.2s ease-in-out; + border: none !important; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; +} + +.metric-card:hover { + transform: translateY(-2px); + box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15) !important; +} + +.chart-card { + border: none !important; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; + border-radius: 10px !important; +} + +/* Status indicators */ +.status-passed { + background-color: #d4edda !important; + color: #155724 !important; +} + +.status-failed { + background-color: #f8d7da !important; + color: #721c24 !important; +} + +.status-warning { + background-color: #fff3cd !important; + color: #856404 !important; +} + +.status-info { + background-color: #d1ecf1 !important; + color: #0c5460 !important; +} + +/* Loading states */ +.loading-overlay { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(255, 255, 255, 0.9); + display: flex; + align-items: center; + justify-content: center; + z-index: 9999; +} + +/* Responsive adjustments */ +@media (max-width: 768px) { + .dashboard-header { + padding: 1rem; + text-align: center; + } + + .metric-card { + margin-bottom: 1rem; + } +} + +/* Animation for updates */ +@keyframes pulse { + 0% { opacity: 1; } + 50% { opacity: 0.7; } + 100% { opacity: 1; } +} + +.updating { + animation: pulse 1s infinite; +} diff --git a/dashboard/components/__pycache__/charts.cpython-312.pyc b/dashboard/components/__pycache__/charts.cpython-312.pyc new file mode 100644 index 0000000..f0ec6fa Binary files /dev/null and b/dashboard/components/__pycache__/charts.cpython-312.pyc differ diff --git a/dashboard/components/__pycache__/layout.cpython-312.pyc b/dashboard/components/__pycache__/layout.cpython-312.pyc new file mode 100644 index 0000000..ebe6727 Binary files /dev/null and b/dashboard/components/__pycache__/layout.cpython-312.pyc differ diff --git a/dashboard/components/charts.py b/dashboard/components/charts.py new file mode 100644 index 0000000..49e505b --- /dev/null +++ b/dashboard/components/charts.py @@ -0,0 +1,371 @@ +""" +Chart components for Kafka Dashboard +Contains all chart generation functions using Plotly +""" + +import plotly.graph_objects as go +import plotly.express as px +from plotly.subplots import make_subplots +import pandas as pd +from typing import Dict, List, Any, Optional + + +class ChartBuilder: + """Builder class for creating Plotly charts""" + + @staticmethod + def create_health_score_gauge(health_score: float, title: str = "Cluster Health Score") -> go.Figure: + """Create a health score gauge chart""" + + # Determine color based on score + if health_score >= 90: + color = "#28a745" # Green + elif health_score >= 70: + color = "#ffc107" # Yellow + else: + color = "#dc3545" # Red + + fig = go.Figure(go.Indicator( + mode="gauge+number+delta", + value=health_score, + domain={'x': [0, 1], 'y': [0, 1]}, + title={'text': title, 'font': {'size': 20}}, + delta={'reference': 90, 'relative': True, 'valueformat': '.1%'}, + gauge={ + 'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"}, + 'bar': {'color': color}, + 'bgcolor': "white", + 'borderwidth': 2, + 'bordercolor': "gray", + 'steps': [ + {'range': [0, 50], 'color': '#f8d7da'}, + {'range': [50, 70], 'color': '#fff3cd'}, + {'range': [70, 90], 'color': '#d1ecf1'}, + {'range': [90, 100], 'color': '#d4edda'} + ], + 'threshold': { + 'line': {'color': "red", 'width': 4}, + 'thickness': 0.75, + 'value': 90 + } + } + )) + + fig.update_layout( + height=350, + font={'color': "darkblue", 'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)' + ) + + return fig + + @staticmethod + def create_health_checks_summary(health_data: Dict[str, int]) -> go.Figure: + """Create health checks summary bar chart""" + + categories = ['Passed', 'Failed', 'Warnings'] + values = [ + health_data.get('passedChecks', 0), + health_data.get('failedChecks', 0), + health_data.get('warnings', 0) + ] + colors = ['#28a745', '#dc3545', '#ffc107'] + + fig = go.Figure() + + for i, (category, value, color) in enumerate(zip(categories, values, colors)): + fig.add_trace(go.Bar( + x=[category], + y=[value], + name=category, + marker_color=color, + text=[value], + textposition='auto', + hovertemplate=f'{category}
Count: %{{y}}' + )) + + fig.update_layout( + title="Health Checks Summary", + xaxis_title="Status", + yaxis_title="Count", + height=350, + showlegend=False, + font={'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)', + xaxis={'gridcolor': '#e0e0e0'}, + yaxis={'gridcolor': '#e0e0e0'} + ) + + return fig + + @staticmethod + def create_topics_distribution(topics_data: Dict[str, int]) -> go.Figure: + """Create topics distribution pie chart""" + + labels = ['User Topics', 'Internal Topics'] + values = [ + topics_data.get('user_topics', 0), + topics_data.get('internal_topics', 0) + ] + colors = ['#007bff', '#6c757d'] + + fig = go.Figure(data=[ + go.Pie( + labels=labels, + values=values, + hole=0.4, + marker_colors=colors, + textinfo='label+percent+value', + hovertemplate='%{label}
Count: %{value}
Percentage: %{percent}' + ) + ]) + + fig.update_layout( + title="Topics Distribution", + height=350, + font={'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)', + annotations=[ + dict(text=f'Total
{sum(values)}', x=0.5, y=0.5, font_size=20, showarrow=False) + ] + ) + + return fig + + @staticmethod + def create_partitions_per_topic(topics: List[Dict[str, Any]]) -> go.Figure: + """Create partitions per topic bar chart""" + + # Filter out internal topics and prepare data + user_topics = [t for t in topics if not t.get('isInternal', False)] + + if not user_topics: + # Create empty chart + fig = go.Figure() + fig.add_annotation( + text="No user topics found", + xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False, + font={'size': 16, 'color': '#6c757d'} + ) + else: + topic_names = [t['name'] for t in user_topics] + partition_counts = [t.get('partitions', 0) for t in user_topics] + + fig = go.Figure(data=[ + go.Bar( + x=topic_names, + y=partition_counts, + marker_color='lightblue', + text=partition_counts, + textposition='auto', + hovertemplate='%{x}
Partitions: %{y}' + ) + ]) + + fig.update_layout( + title="Partitions per Topic", + xaxis_title="Topics", + yaxis_title="Partition Count", + height=350, + font={'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)', + xaxis={'tickangle': -45, 'gridcolor': '#e0e0e0'}, + yaxis={'gridcolor': '#e0e0e0'} + ) + + return fig + + @staticmethod + def create_consumer_groups_chart(consumer_groups: List[Dict[str, Any]]) -> go.Figure: + """Create consumer groups status chart""" + + if not consumer_groups: + fig = go.Figure() + fig.add_annotation( + text="No consumer groups found", + xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False, + font={'size': 16, 'color': '#6c757d'} + ) + fig.update_layout(height=350, title="Consumer Groups Status") + return fig + + # Count groups by status + active_count = sum(1 for cg in consumer_groups if cg.get('members', 0) > 0) + inactive_count = len(consumer_groups) - active_count + + labels = ['Active', 'Inactive'] + values = [active_count, inactive_count] + colors = ['#28a745', '#ffc107'] + + fig = go.Figure(data=[ + go.Pie( + labels=labels, + values=values, + marker_colors=colors, + textinfo='label+percent+value', + hovertemplate='%{label}
Count: %{value}
Percentage: %{percent}' + ) + ]) + + fig.update_layout( + title="Consumer Groups Status", + height=350, + font={'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)' + ) + + return fig + + @staticmethod + def create_replication_factor_chart(topics: List[Dict[str, Any]]) -> go.Figure: + """Create replication factor distribution chart""" + + if not topics: + fig = go.Figure() + fig.add_annotation( + text="No topics found", + xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False, + font={'size': 16, 'color': '#6c757d'} + ) + fig.update_layout(height=350, title="Replication Factor Distribution") + return fig + + # Count topics by replication factor + rf_counts = {} + for topic in topics: + rf = topic.get('replicationFactor', 1) + rf_counts[rf] = rf_counts.get(rf, 0) + 1 + + rfs = list(rf_counts.keys()) + counts = list(rf_counts.values()) + + fig = go.Figure(data=[ + go.Bar( + x=[f"RF={rf}" for rf in rfs], + y=counts, + marker_color='lightcoral', + text=counts, + textposition='auto', + hovertemplate='%{x}
Topics: %{y}' + ) + ]) + + fig.update_layout( + title="Replication Factor Distribution", + xaxis_title="Replication Factor", + yaxis_title="Number of Topics", + height=350, + font={'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)', + xaxis={'gridcolor': '#e0e0e0'}, + yaxis={'gridcolor': '#e0e0e0'} + ) + + return fig + + @staticmethod + def create_health_score_trend(trend_df: pd.DataFrame) -> go.Figure: + """Create health score trend line chart""" + + if trend_df.empty: + fig = go.Figure() + fig.add_annotation( + text="No historical data available", + xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False, + font={'size': 16, 'color': '#6c757d'} + ) + fig.update_layout(height=350, title="Health Score Trend") + return fig + + fig = go.Figure() + + fig.add_trace(go.Scatter( + x=trend_df['timestamp'], + y=trend_df['health_score'], + mode='lines+markers', + name='Health Score', + line=dict(color='#007bff', width=3), + marker=dict(size=8), + hovertemplate='Health Score
Date: %{x}
Score: %{y:.1f}%' + )) + + # Add threshold line + fig.add_hline( + y=90, + line_dash="dash", + line_color="red", + annotation_text="Target: 90%", + annotation_position="bottom right" + ) + + fig.update_layout( + title="Health Score Trend", + xaxis_title="Time", + yaxis_title="Health Score (%)", + height=350, + font={'family': "Inter, Arial"}, + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)', + xaxis={'gridcolor': '#e0e0e0'}, + yaxis={'gridcolor': '#e0e0e0', 'range': [0, 100]} + ) + + return fig + + +class MetricsCards: + """Helper class for creating metric cards data""" + + @staticmethod + def create_cluster_metrics(cluster_info: Dict[str, Any], topics_summary: Dict[str, Any], + consumer_summary: Dict[str, Any]) -> List[Dict[str, Any]]: + """Create metrics cards data""" + + metrics = [ + { + 'title': 'Total Brokers', + 'value': cluster_info.get('total_brokers', 0), + 'icon': 'šŸ–„ļø', + 'color': 'primary' + }, + { + 'title': 'Total Topics', + 'value': topics_summary.get('total_topics', 0), + 'icon': 'šŸ“‹', + 'color': 'info' + }, + { + 'title': 'Total Partitions', + 'value': topics_summary.get('total_partitions', 0), + 'icon': 'šŸ“Š', + 'color': 'success' + }, + { + 'title': 'Consumer Groups', + 'value': consumer_summary.get('total_groups', 0), + 'icon': 'šŸ‘„', + 'color': 'warning' + }, + { + 'title': 'Active Groups', + 'value': consumer_summary.get('active_groups', 0), + 'icon': 'āœ…', + 'color': 'success' + }, + { + 'title': 'Avg Partitions/Topic', + 'value': f"{topics_summary.get('avg_partitions_per_topic', 0):.1f}", + 'icon': 'āš–ļø', + 'color': 'secondary' + } + ] + + return metrics diff --git a/dashboard/components/layout.py b/dashboard/components/layout.py new file mode 100644 index 0000000..c37746f --- /dev/null +++ b/dashboard/components/layout.py @@ -0,0 +1,257 @@ +""" +Layout components for Kafka Dashboard +Contains reusable UI components and layouts +""" + +import dash_bootstrap_components as dbc +from dash import html, dcc, dash_table +from typing import List, Dict, Any + + +class LayoutComponents: + """Collection of reusable layout components""" + + @staticmethod + def create_header() -> dbc.Container: + """Create dashboard header""" + return dbc.Container([ + dbc.Row([ + dbc.Col([ + html.Div([ + html.H1([ + "šŸš€ ", + html.Span("Kafka Cluster Dashboard", className="text-primary"), + ], className="text-center mb-3"), + html.P( + "Real-time monitoring and analysis of your Kafka cluster health", + className="text-center text-muted lead" + ), + html.Hr(className="my-4") + ]) + ], width=12) + ]) + ], fluid=True, className="mb-4") + + @staticmethod + def create_metrics_cards(metrics: List[Dict[str, Any]]) -> dbc.Row: + """Create metrics cards row""" + cards = [] + + for metric in metrics: + card = dbc.Col([ + dbc.Card([ + dbc.CardBody([ + html.Div([ + html.Div([ + html.H2(metric['icon'], className="text-center mb-0"), + ], className="col-auto"), + html.Div([ + html.H4(str(metric['value']), className="mb-0 text-primary"), + html.P(metric['title'], className="text-muted small mb-0") + ], className="col") + ], className="row align-items-center") + ]) + ], className=f"border-left-{metric['color']} shadow-sm h-100") + ], width=12, lg=2, className="mb-3") + cards.append(card) + + return dbc.Row(cards) + + @staticmethod + def create_chart_card(chart_id: str, title: str, description: str = "") -> dbc.Card: + """Create a chart card wrapper""" + return dbc.Card([ + dbc.CardHeader([ + html.H5(title, className="mb-0"), + html.Small(description, className="text-muted") if description else None + ]), + dbc.CardBody([ + dcc.Graph(id=chart_id, config={'displayModeBar': False}) + ]) + ], className="shadow-sm h-100") + + @staticmethod + def create_health_details_table(table_id: str) -> dbc.Card: + """Create health details table card""" + return dbc.Card([ + dbc.CardHeader([ + html.H5("šŸ“‹ Detailed Health Checks", className="mb-0"), + html.Small("Comprehensive analysis of cluster health", className="text-muted") + ]), + dbc.CardBody([ + html.Div(id=table_id) + ]) + ], className="shadow-sm") + + @staticmethod + def create_cluster_info_card(info_id: str) -> dbc.Card: + """Create cluster information card""" + return dbc.Card([ + dbc.CardHeader([ + html.H5("šŸ¢ Cluster Information", className="mb-0"), + html.Small("General cluster metadata and configuration", className="text-muted") + ]), + dbc.CardBody([ + html.Div(id=info_id) + ]) + ], className="shadow-sm h-100") + + @staticmethod + def create_status_badge(status: str) -> html.Span: + """Create status badge based on health check status""" + badge_config = { + 'PASSED': {'color': 'success', 'icon': 'āœ…'}, + 'FAILED': {'color': 'danger', 'icon': 'āŒ'}, + 'WARNING': {'color': 'warning', 'icon': 'āš ļø'}, + 'INFO': {'color': 'info', 'icon': 'ā„¹ļø'} + } + + config = badge_config.get(status, {'color': 'secondary', 'icon': '?'}) + + return dbc.Badge([ + config['icon'], " ", status + ], color=config['color'], className="me-2") + + @staticmethod + def create_data_table(data: List[Dict[str, Any]], table_id: str) -> dash_table.DataTable: + """Create a styled data table""" + if not data: + return html.Div([ + html.P("No data available", className="text-muted text-center p-4") + ]) + + columns = [ + {"name": "Status", "id": "status", "type": "text"}, + {"name": "Health Check", "id": "name", "type": "text"}, + {"name": "Message", "id": "message", "type": "text"}, + {"name": "Recommendation", "id": "recommendation", "type": "text"} + ] + + return dash_table.DataTable( + id=table_id, + data=data, + columns=columns, + style_cell={ + 'textAlign': 'left', + 'whiteSpace': 'normal', + 'height': 'auto', + 'maxWidth': '300px', + 'fontFamily': 'Inter, Arial', + 'fontSize': '14px', + 'padding': '12px' + }, + style_header={ + 'backgroundColor': '#f8f9fa', + 'fontWeight': 'bold', + 'border': '1px solid #dee2e6' + }, + style_data={ + 'border': '1px solid #dee2e6', + 'backgroundColor': 'white' + }, + style_data_conditional=[ + { + 'if': {'filter_query': '{status} contains āœ…'}, + 'backgroundColor': '#d4edda', + 'color': 'black', + }, + { + 'if': {'filter_query': '{status} contains āŒ'}, + 'backgroundColor': '#f8d7da', + 'color': 'black', + }, + { + 'if': {'filter_query': '{status} contains āš ļø'}, + 'backgroundColor': '#fff3cd', + 'color': 'black', + }, + { + 'if': {'filter_query': '{status} contains ā„¹ļø'}, + 'backgroundColor': '#d1ecf1', + 'color': 'black', + } + ], + page_size=15, + sort_action="native", + filter_action="native", + style_table={'overflowX': 'auto'} + ) + + @staticmethod + def create_loading_spinner() -> dbc.Spinner: + """Create loading spinner""" + return dbc.Spinner([ + html.Div([ + html.H4("Loading Kafka data...", className="text-center text-muted"), + html.P("Please wait while we fetch the latest analysis", className="text-center text-muted") + ]) + ], size="lg", color="primary", type="border", fullscreen=True) + + @staticmethod + def create_no_data_message() -> html.Div: + """Create no data available message""" + return html.Div([ + dbc.Alert([ + html.H4("šŸ“Š No Data Available", className="alert-heading"), + html.P([ + "No Kafka analysis reports found. Please run the analyzer first:" + ]), + html.Hr(), + html.Pre([ + "cd /path/to/kafka-analyzer\n", + "npx superstream-kafka-analyzer --config config.json" + ], className="mb-0"), + html.P([ + html.Small("Then refresh this dashboard to view the results.") + ], className="mb-0 mt-2") + ], color="info", className="text-center") + ], className="my-5") + + @staticmethod + def create_refresh_controls() -> dbc.Row: + """Create refresh controls section""" + return dbc.Row([ + dbc.Col([ + dbc.ButtonGroup([ + dbc.Button( + "šŸ”„ Refresh Now", + id="refresh-button", + color="primary", + size="sm", + className="me-2" + ), + dbc.Button( + "āš™ļø Settings", + id="settings-button", + color="outline-secondary", + size="sm" + ) + ]) + ], width="auto"), + dbc.Col([ + html.Div([ + html.Small("Last updated: ", className="text-muted"), + html.Small(id="last-updated", className="text-muted fw-bold") + ]) + ], width="auto", className="ms-auto") + ], className="mb-3 align-items-center") + + +class TabsLayout: + """Layout for tabbed interface""" + + @staticmethod + def create_tabs() -> dbc.Tabs: + """Create main dashboard tabs""" + return dbc.Tabs([ + dbc.Tab(label="šŸ“Š Overview", tab_id="overview"), + dbc.Tab(label="šŸ„ Health Checks", tab_id="health"), + dbc.Tab(label="šŸ“‹ Topics", tab_id="topics"), + dbc.Tab(label="šŸ‘„ Consumer Groups", tab_id="consumers"), + dbc.Tab(label="šŸ“ˆ Trends", tab_id="trends") + ], id="main-tabs", active_tab="overview") + + @staticmethod + def create_tab_content() -> html.Div: + """Create tab content container""" + return html.Div(id="tab-content", className="mt-4") diff --git a/dashboard/requirements.txt b/dashboard/requirements.txt new file mode 100644 index 0000000..7dc51d1 --- /dev/null +++ b/dashboard/requirements.txt @@ -0,0 +1,7 @@ +dash==2.16.1 +plotly==5.17.0 +pandas==2.1.4 +numpy>=1.26.0 +dash-bootstrap-components==1.5.0 +requests==2.31.0 +python-dateutil==2.8.2 diff --git a/dashboard/run_dashboard.py b/dashboard/run_dashboard.py new file mode 100755 index 0000000..d9fe52d --- /dev/null +++ b/dashboard/run_dashboard.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +""" +Kafka Dashboard Launcher +Run this to start the interactive Kafka monitoring dashboard +""" + +import os +import sys +import argparse +import subprocess +from pathlib import Path + + +def check_dependencies(): + """Check if required Python packages are installed""" + required_packages = [ + 'dash', 'plotly', 'pandas', 'numpy', 'dash_bootstrap_components' + ] + + missing_packages = [] + + for package in required_packages: + try: + __import__(package) + except ImportError: + missing_packages.append(package) + + if missing_packages: + print("āŒ Missing required packages:") + for package in missing_packages: + print(f" - {package}") + print("\nšŸ’” Install missing packages with:") + print(" pip install -r requirements.txt") + return False + + return True + + +def find_data_directory(): + """Find the kafka-analysis directory""" + current_dir = Path.cwd() + + # Common locations to check + possible_paths = [ + current_dir / "kafka-analysis", + current_dir / ".." / "kafka-analysis", + current_dir / ".." / ".." / "kafka-analysis", + Path.home() / "kafka-analysis" + ] + + for path in possible_paths: + if path.exists() and path.is_dir(): + return str(path) + + return None + + +def check_analysis_files(data_dir): + """Check if analysis files exist in the data directory""" + if not data_dir or not os.path.exists(data_dir): + return False, [] + + json_files = [] + for file in os.listdir(data_dir): + if file.startswith('kafka-analysis-') and file.endswith('.json'): + json_files.append(file) + + return len(json_files) > 0, json_files + + +def install_dependencies(): + """Install required dependencies""" + print("šŸ“¦ Installing required dependencies...") + try: + subprocess.check_call([ + sys.executable, "-m", "pip", "install", "-r", "requirements.txt" + ]) + print("āœ… Dependencies installed successfully!") + return True + except subprocess.CalledProcessError: + print("āŒ Failed to install dependencies") + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Kafka Cluster Dashboard', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python run_dashboard.py # Run with auto-detected data directory + python run_dashboard.py --data-dir ./reports # Use custom data directory + python run_dashboard.py --port 8080 # Run on different port + python run_dashboard.py --install # Install dependencies first + """ + ) + + parser.add_argument('--data-dir', + help='Directory containing Kafka analysis reports') + parser.add_argument('--port', type=int, default=8050, + help='Port to run dashboard on (default: 8050)') + parser.add_argument('--host', default='127.0.0.1', + help='Host to bind to (default: 127.0.0.1)') + parser.add_argument('--debug', action='store_true', + help='Run in debug mode') + parser.add_argument('--install', action='store_true', + help='Install required dependencies first') + + args = parser.parse_args() + + # Change to dashboard directory + dashboard_dir = Path(__file__).parent + os.chdir(dashboard_dir) + + # Install dependencies if requested + if args.install: + if not install_dependencies(): + sys.exit(1) + + # Check dependencies + if not check_dependencies(): + print("\nšŸ’” Run with --install flag to install dependencies automatically:") + print(" python run_dashboard.py --install") + sys.exit(1) + + # Determine data directory + if args.data_dir: + data_dir = args.data_dir + else: + data_dir = find_data_directory() + if not data_dir: + data_dir = "../kafka-analysis" # Default fallback + + # Check for analysis files + has_files, json_files = check_analysis_files(data_dir) + + print("šŸš€ Kafka Dashboard Launcher") + print("=" * 40) + print(f"šŸ“ Data directory: {os.path.abspath(data_dir)}") + + if not has_files: + print("\nāš ļø No analysis files found!") + print("šŸ’” To generate analysis reports, run:") + print(" cd /path/to/kafka-analyzer") + print(" npx superstream-kafka-analyzer --config config.json") + print("\nšŸ“ The dashboard will still start but show 'No data available'") + else: + print(f"āœ… Found {len(json_files)} analysis file(s)") + print(f"šŸ“„ Latest: {max(json_files, key=lambda x: os.path.getmtime(os.path.join(data_dir, x)))}") + + print(f"\n🌐 Starting dashboard on http://{args.host}:{args.port}") + print("⚔ Dashboard will auto-refresh every 30 seconds") + print("šŸ”„ Click 'Refresh Now' button to manually refresh") + print("\nšŸ“Š Dashboard Features:") + print(" • Real-time cluster health monitoring") + print(" • Interactive charts and metrics") + print(" • Health checks analysis") + print(" • Topics and consumer groups overview") + print(" • Historical trend analysis") + + try: + # Import and run dashboard + from app import KafkaDashboard + + dashboard = KafkaDashboard(data_dir=data_dir) + dashboard.run(debug=args.debug, port=args.port, host=args.host) + + except KeyboardInterrupt: + print("\nšŸ‘‹ Dashboard stopped by user") + except Exception as e: + print(f"\nāŒ Error starting dashboard: {e}") + print("šŸ’” Try running with --debug flag for more details") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/dashboard/start_monitoring.py b/dashboard/start_monitoring.py new file mode 100755 index 0000000..2721f5c --- /dev/null +++ b/dashboard/start_monitoring.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Kafka Analysis + Dashboard Integration +Runs the Kafka analyzer and then starts the dashboard +""" + +import os +import sys +import subprocess +import time +import argparse +from pathlib import Path + + +def run_kafka_analysis(config_file=None, bootstrap_servers=None): + """Run the Kafka analyzer to generate reports""" + print("šŸ” Running Kafka Analysis...") + + # Change to parent directory (where the analyzer is) + original_dir = Path.cwd() + analyzer_dir = Path(__file__).parent.parent + os.chdir(analyzer_dir) + + try: + # Build command + cmd = ["node", "bin/index.js"] + + if config_file: + cmd.extend(["--config", config_file]) + elif bootstrap_servers: + cmd.extend(["--bootstrap-servers", bootstrap_servers]) + + # Run analyzer + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode == 0: + print("āœ… Kafka analysis completed successfully!") + return True + else: + print("āŒ Kafka analysis failed:") + print(result.stderr) + return False + + except Exception as e: + print(f"āŒ Error running analyzer: {e}") + return False + finally: + os.chdir(original_dir) + + +def start_dashboard(port=8050, host='127.0.0.1'): + """Start the dashboard""" + print("šŸš€ Starting Dashboard...") + + try: + # Change to dashboard directory + dashboard_dir = Path(__file__).parent + os.chdir(dashboard_dir) + + # Start dashboard + subprocess.run([ + sys.executable, "run_dashboard.py", + "--port", str(port), + "--host", host + ]) + + except KeyboardInterrupt: + print("\nšŸ‘‹ Dashboard stopped by user") + except Exception as e: + print(f"āŒ Error starting dashboard: {e}") + + +def main(): + parser = argparse.ArgumentParser( + description='Run Kafka analysis and start dashboard', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python start_monitoring.py --config ../kraft-config.json # Use config file + python start_monitoring.py --servers localhost:29092 # Use servers directly + python start_monitoring.py --dashboard-only # Skip analysis, start dashboard only + """ + ) + + parser.add_argument('--config', + help='Path to Kafka analyzer config file') + parser.add_argument('--servers', + help='Kafka bootstrap servers (comma-separated)') + parser.add_argument('--port', type=int, default=8050, + help='Dashboard port (default: 8050)') + parser.add_argument('--host', default='127.0.0.1', + help='Dashboard host (default: 127.0.0.1)') + parser.add_argument('--dashboard-only', action='store_true', + help='Skip analysis, start dashboard only') + parser.add_argument('--analyze-only', action='store_true', + help='Run analysis only, skip dashboard') + + args = parser.parse_args() + + print("šŸŽÆ Kafka Monitoring Setup") + print("=" * 40) + + # Step 1: Run analysis (unless dashboard-only) + if not args.dashboard_only: + if not args.config and not args.servers: + print("āŒ Error: Must provide either --config or --servers") + print("šŸ’” Examples:") + print(" python start_monitoring.py --config ../kraft-config.json") + print(" python start_monitoring.py --servers localhost:29092") + sys.exit(1) + + analysis_success = run_kafka_analysis( + config_file=args.config, + bootstrap_servers=args.servers + ) + + if not analysis_success: + print("āŒ Analysis failed. Dashboard will show 'No data available'") + if not input("Continue with dashboard anyway? (y/N): ").lower().startswith('y'): + sys.exit(1) + + # Step 2: Start dashboard (unless analyze-only) + if not args.analyze_only: + print(f"\n🌐 Dashboard will be available at: http://{args.host}:{args.port}") + time.sleep(2) # Give user time to read + start_dashboard(port=args.port, host=args.host) + else: + print("āœ… Analysis complete. Dashboard not started (--analyze-only flag)") + + +if __name__ == "__main__": + main() diff --git a/dashboard/utils/__pycache__/data_loader.cpython-312.pyc b/dashboard/utils/__pycache__/data_loader.cpython-312.pyc new file mode 100644 index 0000000..d660636 Binary files /dev/null and b/dashboard/utils/__pycache__/data_loader.cpython-312.pyc differ diff --git a/dashboard/utils/data_loader.py b/dashboard/utils/data_loader.py new file mode 100644 index 0000000..3911302 --- /dev/null +++ b/dashboard/utils/data_loader.py @@ -0,0 +1,223 @@ +""" +Data loader utilities for Kafka Dashboard +Handles loading and processing Kafka analysis reports +""" + +import json +import os +import pandas as pd +from datetime import datetime +from typing import Dict, List, Optional, Any +import glob + + +class KafkaDataLoader: + """Handles loading and processing Kafka analysis data""" + + def __init__(self, data_dir: str = "../kafka-analysis"): + self.data_dir = data_dir + + def get_latest_report(self) -> Optional[Dict[str, Any]]: + """Load the most recent Kafka analysis report""" + try: + if not os.path.exists(self.data_dir): + return None + + # Find all JSON analysis files + pattern = os.path.join(self.data_dir, "kafka-analysis-*.json") + json_files = glob.glob(pattern) + + if not json_files: + return None + + # Get the most recent file by modification time + latest_file = max(json_files, key=os.path.getmtime) + + with open(latest_file, 'r') as f: + data = json.load(f) + + # Add file metadata + data['_metadata'] = { + 'filename': os.path.basename(latest_file), + 'filepath': latest_file, + 'last_modified': datetime.fromtimestamp(os.path.getmtime(latest_file)).isoformat() + } + + return data + + except Exception as e: + print(f"Error loading report: {e}") + return None + + def get_all_reports(self) -> List[Dict[str, Any]]: + """Load all available Kafka analysis reports""" + try: + if not os.path.exists(self.data_dir): + return [] + + pattern = os.path.join(self.data_dir, "kafka-analysis-*.json") + json_files = glob.glob(pattern) + + reports = [] + for file_path in sorted(json_files, key=os.path.getmtime): + try: + with open(file_path, 'r') as f: + data = json.load(f) + + # Add metadata + data['_metadata'] = { + 'filename': os.path.basename(file_path), + 'filepath': file_path, + 'last_modified': datetime.fromtimestamp(os.path.getmtime(file_path)).isoformat() + } + + reports.append(data) + except Exception as e: + print(f"Error loading {file_path}: {e}") + continue + + return reports + + except Exception as e: + print(f"Error loading reports: {e}") + return [] + + def get_health_score(self, data: Dict[str, Any]) -> float: + """Calculate health score from analysis data""" + if not data or 'healthChecks' not in data: + return 0.0 + + health_checks = data['healthChecks'] + total_checks = health_checks.get('totalChecks', 0) + passed_checks = health_checks.get('passedChecks', 0) + + return (passed_checks / total_checks * 100) if total_checks > 0 else 0.0 + + def get_topics_summary(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Extract topics summary from analysis data""" + if not data: + return {} + + summary = data.get('summary', {}) + topics = data.get('topics', []) + + # Calculate additional metrics + user_topics = [t for t in topics if not t.get('isInternal', False)] + internal_topics = [t for t in topics if t.get('isInternal', False)] + + total_partitions = sum(t.get('partitions', 0) for t in topics) + avg_partitions = total_partitions / len(topics) if topics else 0 + + return { + 'total_topics': len(topics), + 'user_topics': len(user_topics), + 'internal_topics': len(internal_topics), + 'total_partitions': total_partitions, + 'avg_partitions_per_topic': round(avg_partitions, 2), + 'topics_with_errors': sum(1 for t in topics if t.get('errorCode', 0) != 0) + } + + def get_broker_info(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Extract broker information from analysis data""" + if not data or 'clusterInfo' not in data: + return {} + + cluster_info = data['clusterInfo'] + brokers = cluster_info.get('brokers', []) + + return { + 'total_brokers': len(brokers), + 'cluster_id': cluster_info.get('clusterId', 'Unknown'), + 'controller': cluster_info.get('controller', 'Unknown'), + 'brokers': brokers + } + + def get_consumer_groups_summary(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Extract consumer groups summary""" + if not data or 'consumerGroups' not in data: + return {} + + consumer_groups = data['consumerGroups'] + + active_groups = sum(1 for cg in consumer_groups if cg.get('members', 0) > 0) + inactive_groups = len(consumer_groups) - active_groups + + return { + 'total_groups': len(consumer_groups), + 'active_groups': active_groups, + 'inactive_groups': inactive_groups, + 'groups': consumer_groups + } + + def extract_health_checks_details(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: + """Extract detailed health checks information""" + if not data or 'healthChecks' not in data: + return [] + + health_checks = data['healthChecks'] + checks = health_checks.get('checks', []) + + detailed_checks = [] + for i, check in enumerate(checks): + # Map the check details (this would need to be enhanced based on actual data structure) + detailed_checks.append({ + 'id': i, + 'name': f"Health Check {i+1}", + 'status': 'PASSED', # This would come from actual data + 'message': check.get('description', 'No description available'), + 'recommendation': check.get('recommendation', 'No recommendation') + }) + + return detailed_checks + + +class HistoricalDataProcessor: + """Process historical data for trend analysis""" + + def __init__(self, reports: List[Dict[str, Any]]): + self.reports = reports + + def get_health_score_trend(self) -> pd.DataFrame: + """Get health score trend over time""" + data_loader = KafkaDataLoader() + + trend_data = [] + for report in self.reports: + timestamp = report.get('timestamp', report.get('_metadata', {}).get('last_modified')) + health_score = data_loader.get_health_score(report) + + trend_data.append({ + 'timestamp': timestamp, + 'health_score': health_score, + 'total_checks': report.get('healthChecks', {}).get('totalChecks', 0), + 'passed_checks': report.get('healthChecks', {}).get('passedChecks', 0), + 'failed_checks': report.get('healthChecks', {}).get('failedChecks', 0) + }) + + df = pd.DataFrame(trend_data) + if not df.empty: + df['timestamp'] = pd.to_datetime(df['timestamp']) + df = df.sort_values('timestamp') + + return df + + def get_topics_trend(self) -> pd.DataFrame: + """Get topics trend over time""" + trend_data = [] + for report in self.reports: + timestamp = report.get('timestamp', report.get('_metadata', {}).get('last_modified')) + summary = report.get('summary', {}) + + trend_data.append({ + 'timestamp': timestamp, + 'total_topics': summary.get('totalTopics', 0), + 'user_topics': summary.get('userTopics', 0), + 'total_partitions': summary.get('totalPartitions', 0) + }) + + df = pd.DataFrame(trend_data) + if not df.empty: + df['timestamp'] = pd.to_datetime(df['timestamp']) + df = df.sort_values('timestamp') + + return df diff --git a/kafka-analysis/kafka-analysis-1752105684783.json b/kafka-analysis/kafka-analysis-1752105684783.json new file mode 100644 index 0000000..d39f551 --- /dev/null +++ b/kafka-analysis/kafka-analysis-1752105684783.json @@ -0,0 +1,698 @@ +{ + "clusterInfo": { + "clusterId": "MkU3OEVBNTcwNTJENDM2Qk", + "controller": 1, + "brokers": [ + { + "nodeId": 1, + "host": "localhost", + "port": 29092 + } + ], + "topics": 2 + }, + "topics": [ + { + "name": "test_topic", + "partitions": 1, + "replicationFactor": 1, + "config": { + "undefined": { + "isDefault": true, + "isSensitive": false + } + }, + "isInternal": false, + "errorCode": 0, + "errorMessage": "Topic has errors", + "vendor": "apache-kafka", + "partitionDetails": [ + { + "id": 0, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + } + ] + }, + { + "name": "__consumer_offsets", + "partitions": 50, + "replicationFactor": 1, + "config": { + "undefined": { + "isDefault": true, + "isSensitive": false + } + }, + "isInternal": true, + "errorCode": 0, + "errorMessage": "Topic has errors", + "vendor": "apache-kafka", + "partitionDetails": [ + { + "id": 7, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 1, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 42, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 13, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 25, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 36, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 31, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 37, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 43, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 18, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 24, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 8, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 14, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 30, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 29, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 32, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 26, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 38, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 0, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 17, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 41, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 20, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 47, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 49, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 15, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 23, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 44, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 12, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 9, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 3, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 6, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 35, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 10, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 45, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 16, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 4, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 33, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 28, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 39, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 40, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 5, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 34, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 21, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 46, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 11, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 27, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 2, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 22, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 19, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 48, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + } + ] + } + ], + "consumerGroups": [ + { + "groupId": "perf-test-group", + "protocolType": "unknown", + "members": 0, + "state": "error" + } + ], + "summary": { + "totalTopics": 2, + "totalPartitions": 51, + "internalTopics": 1, + "userTopics": 1, + "topicsWithErrors": 0, + "consumerGroups": 1 + }, + "timestamp": "2025-07-10T00:01:24.675Z", + "healthChecks": { + "vendor": "apache-kafka", + "totalChecks": 14, + "passedChecks": 7, + "failedChecks": 1, + "warnings": 5, + "checks": [ + { + "id": "replication-factor", + "name": "Replication Factor vs Broker Count", + "status": "pass", + "message": "All topics have appropriate replication factor (≤ 1 brokers)", + "recommendation": null, + "description": "Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count." + }, + { + "id": "partition-distribution", + "name": "Topic Partition Distribution", + "status": "pass", + "message": "Good partition distribution: avg=1.0, min=1, max=1", + "recommendation": null, + "description": "Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions." + }, + { + "id": "consumer-groups", + "name": "Consumer Group Health", + "status": "warning", + "message": "1 consumer group(s) have no active members: perf-test-group", + "recommendation": "Consider cleaning up unused consumer groups", + "description": "Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members." + }, + { + "id": "internal-topics", + "name": "Internal Topics Health", + "status": "pass", + "message": "All 1 internal topics are healthy", + "recommendation": null, + "description": "Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions." + }, + { + "id": "under-replicated-partitions", + "name": "Under-Replicated Partitions", + "status": "pass", + "message": "All topics have the expected number of in-sync replicas", + "recommendation": null, + "description": "" + }, + { + "id": "min-insync-replicas", + "name": "Min In-Sync Replicas Configuration", + "status": "pass", + "message": "All topics have appropriate min.insync.replicas configuration", + "recommendation": null, + "description": "" + }, + { + "id": "rack-awareness", + "name": "Rack Awareness", + "status": "warning", + "message": "Rack awareness is not configured - no brokers have rack information", + "recommendation": "Consider enabling rack awareness for better availability and fault tolerance", + "description": "Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured." + }, + { + "id": "replica-distribution", + "name": "Replica Distribution", + "status": "pass", + "message": "Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51)", + "recommendation": null, + "description": "Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues." + }, + { + "id": "metrics-enabled", + "name": "Metrics Configuration", + "status": "warning", + "message": "No JMX metrics configuration detected on any brokers", + "recommendation": "Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis", + "description": "Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured." + }, + { + "id": "logging-configuration", + "name": "Generic Kafka Logging Configuration", + "status": "info", + "message": "Generic Kafka logging configuration check", + "recommendation": "Verify log4j configuration and log directory permissions in server.properties", + "description": "Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured." + }, + { + "id": "authentication-configuration", + "name": "Generic Kafka Authentication Configuration", + "status": "fail", + "message": "Unauthenticated access is enabled - this is a security risk", + "recommendation": "Enable SASL or SSL authentication in server.properties for better security", + "description": "Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk)." + }, + { + "id": "quotas-configuration", + "name": "Generic Kafka Quotas Configuration", + "status": "warning", + "message": "No quota configuration detected in Kafka cluster", + "recommendation": "Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management", + "description": "Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available." + }, + { + "id": "payload-compression", + "name": "Payload Compression", + "status": "warning", + "message": "No compression detected on any of the 1 user topics (0%)", + "recommendation": "Enable compression on topics to reduce storage usage and improve network performance", + "description": "Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze." + }, + { + "id": "infinite-retention-policy", + "name": "Infinite Retention Policy", + "status": "pass", + "message": "No topics have infinite retention policy enabled", + "recommendation": null, + "description": "Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy." + } + ] + } +} \ No newline at end of file diff --git a/kafka-analysis/kafka-analysis-1752105715380.json b/kafka-analysis/kafka-analysis-1752105715380.json new file mode 100644 index 0000000..5e6dfd2 --- /dev/null +++ b/kafka-analysis/kafka-analysis-1752105715380.json @@ -0,0 +1,698 @@ +{ + "clusterInfo": { + "clusterId": "MkU3OEVBNTcwNTJENDM2Qk", + "controller": 1, + "brokers": [ + { + "nodeId": 1, + "host": "localhost", + "port": 29092 + } + ], + "topics": 2 + }, + "topics": [ + { + "name": "test_topic", + "partitions": 1, + "replicationFactor": 1, + "config": { + "undefined": { + "isDefault": true, + "isSensitive": false + } + }, + "isInternal": false, + "errorCode": 0, + "errorMessage": "Topic has errors", + "vendor": "apache-kafka", + "partitionDetails": [ + { + "id": 0, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + } + ] + }, + { + "name": "__consumer_offsets", + "partitions": 50, + "replicationFactor": 1, + "config": { + "undefined": { + "isDefault": true, + "isSensitive": false + } + }, + "isInternal": true, + "errorCode": 0, + "errorMessage": "Topic has errors", + "vendor": "apache-kafka", + "partitionDetails": [ + { + "id": 7, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 1, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 42, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 13, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 25, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 36, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 31, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 37, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 43, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 18, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 24, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 8, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 14, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 30, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 29, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 32, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 26, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 38, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 0, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 17, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 41, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 20, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 47, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 49, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 15, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 23, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 44, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 12, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 9, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 3, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 6, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 35, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 10, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 45, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 16, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 4, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 33, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 28, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 39, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 40, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 5, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 34, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 21, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 46, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 11, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 27, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 2, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 22, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 19, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 48, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + } + ] + } + ], + "consumerGroups": [ + { + "groupId": "perf-test-group", + "protocolType": "unknown", + "members": 0, + "state": "error" + } + ], + "summary": { + "totalTopics": 2, + "totalPartitions": 51, + "internalTopics": 1, + "userTopics": 1, + "topicsWithErrors": 0, + "consumerGroups": 1 + }, + "timestamp": "2025-07-10T00:01:55.307Z", + "healthChecks": { + "vendor": "apache-kafka", + "totalChecks": 14, + "passedChecks": 7, + "failedChecks": 1, + "warnings": 5, + "checks": [ + { + "id": "replication-factor", + "name": "Replication Factor vs Broker Count", + "status": "pass", + "message": "All topics have appropriate replication factor (≤ 1 brokers)", + "recommendation": null, + "description": "Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count." + }, + { + "id": "partition-distribution", + "name": "Topic Partition Distribution", + "status": "pass", + "message": "Good partition distribution: avg=1.0, min=1, max=1", + "recommendation": null, + "description": "Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions." + }, + { + "id": "consumer-groups", + "name": "Consumer Group Health", + "status": "warning", + "message": "1 consumer group(s) have no active members: perf-test-group", + "recommendation": "Consider cleaning up unused consumer groups", + "description": "Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members." + }, + { + "id": "internal-topics", + "name": "Internal Topics Health", + "status": "pass", + "message": "All 1 internal topics are healthy", + "recommendation": null, + "description": "Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions." + }, + { + "id": "under-replicated-partitions", + "name": "Under-Replicated Partitions", + "status": "pass", + "message": "All topics have the expected number of in-sync replicas", + "recommendation": null, + "description": "" + }, + { + "id": "min-insync-replicas", + "name": "Min In-Sync Replicas Configuration", + "status": "pass", + "message": "All topics have appropriate min.insync.replicas configuration", + "recommendation": null, + "description": "" + }, + { + "id": "rack-awareness", + "name": "Rack Awareness", + "status": "warning", + "message": "Rack awareness is not configured - no brokers have rack information", + "recommendation": "Consider enabling rack awareness for better availability and fault tolerance", + "description": "Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured." + }, + { + "id": "replica-distribution", + "name": "Replica Distribution", + "status": "pass", + "message": "Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51)", + "recommendation": null, + "description": "Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues." + }, + { + "id": "metrics-enabled", + "name": "Metrics Configuration", + "status": "warning", + "message": "No JMX metrics configuration detected on any brokers", + "recommendation": "Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis", + "description": "Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured." + }, + { + "id": "logging-configuration", + "name": "Generic Kafka Logging Configuration", + "status": "info", + "message": "Generic Kafka logging configuration check", + "recommendation": "Verify log4j configuration and log directory permissions in server.properties", + "description": "Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured." + }, + { + "id": "authentication-configuration", + "name": "Generic Kafka Authentication Configuration", + "status": "fail", + "message": "Unauthenticated access is enabled - this is a security risk", + "recommendation": "Enable SASL or SSL authentication in server.properties for better security", + "description": "Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk)." + }, + { + "id": "quotas-configuration", + "name": "Generic Kafka Quotas Configuration", + "status": "warning", + "message": "No quota configuration detected in Kafka cluster", + "recommendation": "Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management", + "description": "Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available." + }, + { + "id": "payload-compression", + "name": "Payload Compression", + "status": "warning", + "message": "No compression detected on any of the 1 user topics (0%)", + "recommendation": "Enable compression on topics to reduce storage usage and improve network performance", + "description": "Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze." + }, + { + "id": "infinite-retention-policy", + "name": "Infinite Retention Policy", + "status": "pass", + "message": "No topics have infinite retention policy enabled", + "recommendation": null, + "description": "Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy." + } + ] + } +} \ No newline at end of file diff --git a/kafka-analysis/kafka-analysis-1752105730445.json b/kafka-analysis/kafka-analysis-1752105730445.json new file mode 100644 index 0000000..bb20ecc --- /dev/null +++ b/kafka-analysis/kafka-analysis-1752105730445.json @@ -0,0 +1,698 @@ +{ + "clusterInfo": { + "clusterId": "MkU3OEVBNTcwNTJENDM2Qk", + "controller": 1, + "brokers": [ + { + "nodeId": 1, + "host": "localhost", + "port": 29092 + } + ], + "topics": 2 + }, + "topics": [ + { + "name": "test_topic", + "partitions": 1, + "replicationFactor": 1, + "config": { + "undefined": { + "isDefault": true, + "isSensitive": false + } + }, + "isInternal": false, + "errorCode": 0, + "errorMessage": "Topic has errors", + "vendor": "apache-kafka", + "partitionDetails": [ + { + "id": 0, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + } + ] + }, + { + "name": "__consumer_offsets", + "partitions": 50, + "replicationFactor": 1, + "config": { + "undefined": { + "isDefault": true, + "isSensitive": false + } + }, + "isInternal": true, + "errorCode": 0, + "errorMessage": "Topic has errors", + "vendor": "apache-kafka", + "partitionDetails": [ + { + "id": 7, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 1, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 42, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 13, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 25, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 36, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 31, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 37, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 43, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 18, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 24, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 8, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 14, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 30, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 29, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 32, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 26, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 38, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 0, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 17, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 41, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 20, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 47, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 49, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 15, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 23, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 44, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 12, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 9, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 3, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 6, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 35, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 10, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 45, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 16, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 4, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 33, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 28, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 39, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 40, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 5, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 34, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 21, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 46, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 11, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 27, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 2, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 22, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 19, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + }, + { + "id": 48, + "leader": 1, + "replicas": [ + 1 + ], + "isr": [ + 1 + ] + } + ] + } + ], + "consumerGroups": [ + { + "groupId": "perf-test-group", + "protocolType": "unknown", + "members": 0, + "state": "error" + } + ], + "summary": { + "totalTopics": 2, + "totalPartitions": 51, + "internalTopics": 1, + "userTopics": 1, + "topicsWithErrors": 0, + "consumerGroups": 1 + }, + "timestamp": "2025-07-10T00:02:10.369Z", + "healthChecks": { + "vendor": "apache-kafka", + "totalChecks": 14, + "passedChecks": 7, + "failedChecks": 1, + "warnings": 5, + "checks": [ + { + "id": "replication-factor", + "name": "Replication Factor vs Broker Count", + "status": "pass", + "message": "All topics have appropriate replication factor (≤ 1 brokers)", + "recommendation": null, + "description": "Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count." + }, + { + "id": "partition-distribution", + "name": "Topic Partition Distribution", + "status": "pass", + "message": "Good partition distribution: avg=1.0, min=1, max=1", + "recommendation": null, + "description": "Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions." + }, + { + "id": "consumer-groups", + "name": "Consumer Group Health", + "status": "warning", + "message": "1 consumer group(s) have no active members: perf-test-group", + "recommendation": "Consider cleaning up unused consumer groups", + "description": "Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members." + }, + { + "id": "internal-topics", + "name": "Internal Topics Health", + "status": "pass", + "message": "All 1 internal topics are healthy", + "recommendation": null, + "description": "Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions." + }, + { + "id": "under-replicated-partitions", + "name": "Under-Replicated Partitions", + "status": "pass", + "message": "All topics have the expected number of in-sync replicas", + "recommendation": null, + "description": "" + }, + { + "id": "min-insync-replicas", + "name": "Min In-Sync Replicas Configuration", + "status": "pass", + "message": "All topics have appropriate min.insync.replicas configuration", + "recommendation": null, + "description": "" + }, + { + "id": "rack-awareness", + "name": "Rack Awareness", + "status": "warning", + "message": "Rack awareness is not configured - no brokers have rack information", + "recommendation": "Consider enabling rack awareness for better availability and fault tolerance", + "description": "Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured." + }, + { + "id": "replica-distribution", + "name": "Replica Distribution", + "status": "pass", + "message": "Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51)", + "recommendation": null, + "description": "Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues." + }, + { + "id": "metrics-enabled", + "name": "Metrics Configuration", + "status": "warning", + "message": "No JMX metrics configuration detected on any brokers", + "recommendation": "Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis", + "description": "Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured." + }, + { + "id": "logging-configuration", + "name": "Generic Kafka Logging Configuration", + "status": "info", + "message": "Generic Kafka logging configuration check", + "recommendation": "Verify log4j configuration and log directory permissions in server.properties", + "description": "Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured." + }, + { + "id": "authentication-configuration", + "name": "Generic Kafka Authentication Configuration", + "status": "fail", + "message": "Unauthenticated access is enabled - this is a security risk", + "recommendation": "Enable SASL or SSL authentication in server.properties for better security", + "description": "Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk)." + }, + { + "id": "quotas-configuration", + "name": "Generic Kafka Quotas Configuration", + "status": "warning", + "message": "No quota configuration detected in Kafka cluster", + "recommendation": "Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management", + "description": "Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available." + }, + { + "id": "payload-compression", + "name": "Payload Compression", + "status": "warning", + "message": "No compression detected on any of the 1 user topics (0%)", + "recommendation": "Enable compression on topics to reduce storage usage and improve network performance", + "description": "Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze." + }, + { + "id": "infinite-retention-policy", + "name": "Infinite Retention Policy", + "status": "pass", + "message": "No topics have infinite retention policy enabled", + "recommendation": null, + "description": "Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy." + } + ] + } +} \ No newline at end of file diff --git a/kafka-analysis/kafka-health-checks-1752105684783.csv b/kafka-analysis/kafka-health-checks-1752105684783.csv new file mode 100644 index 0000000..fac3ac0 --- /dev/null +++ b/kafka-analysis/kafka-health-checks-1752105684783.csv @@ -0,0 +1,16 @@ +"Health Check Results" +"Check Name","Status","Message","Description","Recommendation" +"Replication Factor vs Broker Count","pass","All topics have appropriate replication factor (≤ 1 brokers)","Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count.","" +"Topic Partition Distribution","pass","Good partition distribution: avg=1.0, min=1, max=1","Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions.","" +"Consumer Group Health","warning","1 consumer group(s) have no active members: perf-test-group","Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members.","Consider cleaning up unused consumer groups" +"Internal Topics Health","pass","All 1 internal topics are healthy","Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions.","" +"Under-Replicated Partitions","pass","All topics have the expected number of in-sync replicas","","" +"Min In-Sync Replicas Configuration","pass","All topics have appropriate min.insync.replicas configuration","","" +"Rack Awareness","warning","Rack awareness is not configured - no brokers have rack information","Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured.","Consider enabling rack awareness for better availability and fault tolerance" +"Replica Distribution","pass","Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51)","Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues.","" +"Metrics Configuration","warning","No JMX metrics configuration detected on any brokers","Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured.","Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis" +"Generic Kafka Logging Configuration","info","Generic Kafka logging configuration check","Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured.","Verify log4j configuration and log directory permissions in server.properties" +"Generic Kafka Authentication Configuration","fail","Unauthenticated access is enabled - this is a security risk","Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk).","Enable SASL or SSL authentication in server.properties for better security" +"Generic Kafka Quotas Configuration","warning","No quota configuration detected in Kafka cluster","Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available.","Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management" +"Payload Compression","warning","No compression detected on any of the 1 user topics (0%)","Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze.","Enable compression on topics to reduce storage usage and improve network performance" +"Infinite Retention Policy","pass","No topics have infinite retention policy enabled","Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy.","" \ No newline at end of file diff --git a/kafka-analysis/kafka-health-checks-1752105715380.csv b/kafka-analysis/kafka-health-checks-1752105715380.csv new file mode 100644 index 0000000..fac3ac0 --- /dev/null +++ b/kafka-analysis/kafka-health-checks-1752105715380.csv @@ -0,0 +1,16 @@ +"Health Check Results" +"Check Name","Status","Message","Description","Recommendation" +"Replication Factor vs Broker Count","pass","All topics have appropriate replication factor (≤ 1 brokers)","Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count.","" +"Topic Partition Distribution","pass","Good partition distribution: avg=1.0, min=1, max=1","Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions.","" +"Consumer Group Health","warning","1 consumer group(s) have no active members: perf-test-group","Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members.","Consider cleaning up unused consumer groups" +"Internal Topics Health","pass","All 1 internal topics are healthy","Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions.","" +"Under-Replicated Partitions","pass","All topics have the expected number of in-sync replicas","","" +"Min In-Sync Replicas Configuration","pass","All topics have appropriate min.insync.replicas configuration","","" +"Rack Awareness","warning","Rack awareness is not configured - no brokers have rack information","Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured.","Consider enabling rack awareness for better availability and fault tolerance" +"Replica Distribution","pass","Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51)","Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues.","" +"Metrics Configuration","warning","No JMX metrics configuration detected on any brokers","Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured.","Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis" +"Generic Kafka Logging Configuration","info","Generic Kafka logging configuration check","Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured.","Verify log4j configuration and log directory permissions in server.properties" +"Generic Kafka Authentication Configuration","fail","Unauthenticated access is enabled - this is a security risk","Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk).","Enable SASL or SSL authentication in server.properties for better security" +"Generic Kafka Quotas Configuration","warning","No quota configuration detected in Kafka cluster","Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available.","Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management" +"Payload Compression","warning","No compression detected on any of the 1 user topics (0%)","Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze.","Enable compression on topics to reduce storage usage and improve network performance" +"Infinite Retention Policy","pass","No topics have infinite retention policy enabled","Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy.","" \ No newline at end of file diff --git a/kafka-analysis/kafka-health-checks-1752105730445.csv b/kafka-analysis/kafka-health-checks-1752105730445.csv new file mode 100644 index 0000000..fac3ac0 --- /dev/null +++ b/kafka-analysis/kafka-health-checks-1752105730445.csv @@ -0,0 +1,16 @@ +"Health Check Results" +"Check Name","Status","Message","Description","Recommendation" +"Replication Factor vs Broker Count","pass","All topics have appropriate replication factor (≤ 1 brokers)","Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count.","" +"Topic Partition Distribution","pass","Good partition distribution: avg=1.0, min=1, max=1","Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions.","" +"Consumer Group Health","warning","1 consumer group(s) have no active members: perf-test-group","Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members.","Consider cleaning up unused consumer groups" +"Internal Topics Health","pass","All 1 internal topics are healthy","Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions.","" +"Under-Replicated Partitions","pass","All topics have the expected number of in-sync replicas","","" +"Min In-Sync Replicas Configuration","pass","All topics have appropriate min.insync.replicas configuration","","" +"Rack Awareness","warning","Rack awareness is not configured - no brokers have rack information","Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured.","Consider enabling rack awareness for better availability and fault tolerance" +"Replica Distribution","pass","Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51)","Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues.","" +"Metrics Configuration","warning","No JMX metrics configuration detected on any brokers","Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured.","Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis" +"Generic Kafka Logging Configuration","info","Generic Kafka logging configuration check","Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured.","Verify log4j configuration and log directory permissions in server.properties" +"Generic Kafka Authentication Configuration","fail","Unauthenticated access is enabled - this is a security risk","Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk).","Enable SASL or SSL authentication in server.properties for better security" +"Generic Kafka Quotas Configuration","warning","No quota configuration detected in Kafka cluster","Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available.","Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management" +"Payload Compression","warning","No compression detected on any of the 1 user topics (0%)","Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze.","Enable compression on topics to reduce storage usage and improve network performance" +"Infinite Retention Policy","pass","No topics have infinite retention policy enabled","Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy.","" \ No newline at end of file diff --git a/kafka-analysis/kafka-report-1752105684783.html b/kafka-analysis/kafka-report-1752105684783.html new file mode 100644 index 0000000..f94a5af --- /dev/null +++ b/kafka-analysis/kafka-report-1752105684783.html @@ -0,0 +1,850 @@ + + + + Modern Kafka Health Report + + + +
+
+ + + + +
+
+

Kafka Health Report

+

Comprehensive analysis of your Kafka cluster health and performance

+
+ + + +
+

Health Check Summary

+
+
+
14
+
Total Checks
+
+
+
7
+
Passed
+
+
+
5
+
Warnings
+
+
+
1
+
Failed
+
+
+
+ + +
+

Health Check Results

+ +
+ +
+
+
+ Failed +
+

Generic Kafka Authentication Configuration

+ Failed +
+ +
+ Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk). +
+
+ Unauthenticated access is enabled - this is a security risk +
+ +
+

šŸ’” Recommendation: Enable SASL or SSL authentication in server.properties for better security

+
+
+ +
+
+
+ Warning +
+

Consumer Group Health

+ Warning +
+ +
+ Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members. +
+
+ 1 consumer group(s) have no active members: perf-test-group +
+ +
+

šŸ’” Recommendation: Consider cleaning up unused consumer groups

+
+
+ +
+
+
+ Warning +
+

Rack Awareness

+ Warning +
+ +
+ Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured. +
+
+ Rack awareness is not configured - no brokers have rack information +
+ +
+

šŸ’” Recommendation: Consider enabling rack awareness for better availability and fault tolerance

+
+
+ +
+
+
+ Warning +
+

Metrics Configuration

+ Warning +
+ +
+ Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured. +
+
+ No JMX metrics configuration detected on any brokers +
+ +
+

šŸ’” Recommendation: Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis

+
+
+ +
+
+
+ Warning +
+

Generic Kafka Quotas Configuration

+ Warning +
+ +
+ Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available. +
+
+ No quota configuration detected in Kafka cluster +
+ +
+

šŸ’” Recommendation: Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management

+
+
+ +
+
+
+ Warning +
+

Payload Compression

+ Warning +
+ +
+ Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze. +
+
+ No compression detected on any of the 1 user topics (0%) +
+ +
+

šŸ’” Recommendation: Enable compression on topics to reduce storage usage and improve network performance

+
+
+ +
+
+
+ Info +
+

Generic Kafka Logging Configuration

+ Info +
+ +
+ Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured. +
+
+ Generic Kafka logging configuration check +
+ +
+

šŸ’” Recommendation: Verify log4j configuration and log directory permissions in server.properties

+
+
+ +
+
+
+ Passed +
+

Replication Factor vs Broker Count

+ Passed +
+ +
+ Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count. +
+
+ All topics have appropriate replication factor (≤ 1 brokers) +
+ +
+ +
+
+
+ Passed +
+

Topic Partition Distribution

+ Passed +
+ +
+ Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions. +
+
+ Good partition distribution: avg=1.0, min=1, max=1 +
+ +
+ +
+
+
+ Passed +
+

Internal Topics Health

+ Passed +
+ +
+ Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions. +
+
+ All 1 internal topics are healthy +
+ +
+ +
+
+
+ Passed +
+

Under-Replicated Partitions

+ Passed +
+ +
+ All topics have the expected number of in-sync replicas +
+ +
+ +
+
+
+ Passed +
+

Min In-Sync Replicas Configuration

+ Passed +
+ +
+ All topics have appropriate min.insync.replicas configuration +
+ +
+ +
+
+
+ Passed +
+

Replica Distribution

+ Passed +
+ +
+ Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues. +
+
+ Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51) +
+ +
+ +
+
+
+ Passed +
+

Infinite Retention Policy

+ Passed +
+ +
+ Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy. +
+
+ No topics have infinite retention policy enabled +
+ +
+ +
+
+ + + + +
+
+
+ + \ No newline at end of file diff --git a/kafka-analysis/kafka-report-1752105715380.html b/kafka-analysis/kafka-report-1752105715380.html new file mode 100644 index 0000000..1fc2ec5 --- /dev/null +++ b/kafka-analysis/kafka-report-1752105715380.html @@ -0,0 +1,850 @@ + + + + Modern Kafka Health Report + + + +
+
+ + + + +
+
+

Kafka Health Report

+

Comprehensive analysis of your Kafka cluster health and performance

+
+ + + +
+

Health Check Summary

+
+
+
14
+
Total Checks
+
+
+
7
+
Passed
+
+
+
5
+
Warnings
+
+
+
1
+
Failed
+
+
+
+ + +
+

Health Check Results

+ +
+ +
+
+
+ Failed +
+

Generic Kafka Authentication Configuration

+ Failed +
+ +
+ Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk). +
+
+ Unauthenticated access is enabled - this is a security risk +
+ +
+

šŸ’” Recommendation: Enable SASL or SSL authentication in server.properties for better security

+
+
+ +
+
+
+ Warning +
+

Consumer Group Health

+ Warning +
+ +
+ Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members. +
+
+ 1 consumer group(s) have no active members: perf-test-group +
+ +
+

šŸ’” Recommendation: Consider cleaning up unused consumer groups

+
+
+ +
+
+
+ Warning +
+

Rack Awareness

+ Warning +
+ +
+ Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured. +
+
+ Rack awareness is not configured - no brokers have rack information +
+ +
+

šŸ’” Recommendation: Consider enabling rack awareness for better availability and fault tolerance

+
+
+ +
+
+
+ Warning +
+

Metrics Configuration

+ Warning +
+ +
+ Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured. +
+
+ No JMX metrics configuration detected on any brokers +
+ +
+

šŸ’” Recommendation: Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis

+
+
+ +
+
+
+ Warning +
+

Generic Kafka Quotas Configuration

+ Warning +
+ +
+ Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available. +
+
+ No quota configuration detected in Kafka cluster +
+ +
+

šŸ’” Recommendation: Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management

+
+
+ +
+
+
+ Warning +
+

Payload Compression

+ Warning +
+ +
+ Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze. +
+
+ No compression detected on any of the 1 user topics (0%) +
+ +
+

šŸ’” Recommendation: Enable compression on topics to reduce storage usage and improve network performance

+
+
+ +
+
+
+ Info +
+

Generic Kafka Logging Configuration

+ Info +
+ +
+ Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured. +
+
+ Generic Kafka logging configuration check +
+ +
+

šŸ’” Recommendation: Verify log4j configuration and log directory permissions in server.properties

+
+
+ +
+
+
+ Passed +
+

Replication Factor vs Broker Count

+ Passed +
+ +
+ Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count. +
+
+ All topics have appropriate replication factor (≤ 1 brokers) +
+ +
+ +
+
+
+ Passed +
+

Topic Partition Distribution

+ Passed +
+ +
+ Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions. +
+
+ Good partition distribution: avg=1.0, min=1, max=1 +
+ +
+ +
+
+
+ Passed +
+

Internal Topics Health

+ Passed +
+ +
+ Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions. +
+
+ All 1 internal topics are healthy +
+ +
+ +
+
+
+ Passed +
+

Under-Replicated Partitions

+ Passed +
+ +
+ All topics have the expected number of in-sync replicas +
+ +
+ +
+
+
+ Passed +
+

Min In-Sync Replicas Configuration

+ Passed +
+ +
+ All topics have appropriate min.insync.replicas configuration +
+ +
+ +
+
+
+ Passed +
+

Replica Distribution

+ Passed +
+ +
+ Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues. +
+
+ Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51) +
+ +
+ +
+
+
+ Passed +
+

Infinite Retention Policy

+ Passed +
+ +
+ Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy. +
+
+ No topics have infinite retention policy enabled +
+ +
+ +
+
+ + + + +
+
+
+ + \ No newline at end of file diff --git a/kafka-analysis/kafka-report-1752105730445.html b/kafka-analysis/kafka-report-1752105730445.html new file mode 100644 index 0000000..5fd13c7 --- /dev/null +++ b/kafka-analysis/kafka-report-1752105730445.html @@ -0,0 +1,850 @@ + + + + Modern Kafka Health Report + + + +
+
+ + + + +
+
+

Kafka Health Report

+

Comprehensive analysis of your Kafka cluster health and performance

+
+ + + +
+

Health Check Summary

+
+
+
14
+
Total Checks
+
+
+
7
+
Passed
+
+
+
5
+
Warnings
+
+
+
1
+
Failed
+
+
+
+ + +
+

Health Check Results

+ +
+ +
+
+
+ Failed +
+

Generic Kafka Authentication Configuration

+ Failed +
+ +
+ Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk). +
+
+ Unauthenticated access is enabled - this is a security risk +
+ +
+

šŸ’” Recommendation: Enable SASL or SSL authentication in server.properties for better security

+
+
+ +
+
+
+ Warning +
+

Consumer Group Health

+ Warning +
+ +
+ Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members. +
+
+ 1 consumer group(s) have no active members: perf-test-group +
+ +
+

šŸ’” Recommendation: Consider cleaning up unused consumer groups

+
+
+ +
+
+
+ Warning +
+

Rack Awareness

+ Warning +
+ +
+ Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured. +
+
+ Rack awareness is not configured - no brokers have rack information +
+ +
+

šŸ’” Recommendation: Consider enabling rack awareness for better availability and fault tolerance

+
+
+ +
+
+
+ Warning +
+

Metrics Configuration

+ Warning +
+ +
+ Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured. +
+
+ No JMX metrics configuration detected on any brokers +
+ +
+

šŸ’” Recommendation: Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis

+
+
+ +
+
+
+ Warning +
+

Generic Kafka Quotas Configuration

+ Warning +
+ +
+ Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available. +
+
+ No quota configuration detected in Kafka cluster +
+ +
+

šŸ’” Recommendation: Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management

+
+
+ +
+
+
+ Warning +
+

Payload Compression

+ Warning +
+ +
+ Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze. +
+
+ No compression detected on any of the 1 user topics (0%) +
+ +
+

šŸ’” Recommendation: Enable compression on topics to reduce storage usage and improve network performance

+
+
+ +
+
+
+ Info +
+

Generic Kafka Logging Configuration

+ Info +
+ +
+ Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured. +
+
+ Generic Kafka logging configuration check +
+ +
+

šŸ’” Recommendation: Verify log4j configuration and log directory permissions in server.properties

+
+
+ +
+
+
+ Passed +
+

Replication Factor vs Broker Count

+ Passed +
+ +
+ Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count. +
+
+ All topics have appropriate replication factor (≤ 1 brokers) +
+ +
+ +
+
+
+ Passed +
+

Topic Partition Distribution

+ Passed +
+ +
+ Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions. +
+
+ Good partition distribution: avg=1.0, min=1, max=1 +
+ +
+ +
+
+
+ Passed +
+

Internal Topics Health

+ Passed +
+ +
+ Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions. +
+
+ All 1 internal topics are healthy +
+ +
+ +
+
+
+ Passed +
+

Under-Replicated Partitions

+ Passed +
+ +
+ All topics have the expected number of in-sync replicas +
+ +
+ +
+
+
+ Passed +
+

Min In-Sync Replicas Configuration

+ Passed +
+ +
+ All topics have appropriate min.insync.replicas configuration +
+ +
+ +
+
+
+ Passed +
+

Replica Distribution

+ Passed +
+ +
+ Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues. +
+
+ Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51) +
+ +
+ +
+
+
+ Passed +
+

Infinite Retention Policy

+ Passed +
+ +
+ Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy. +
+
+ No topics have infinite retention policy enabled +
+ +
+ +
+
+ + + + +
+
+
+ + \ No newline at end of file diff --git a/kafka-analysis/kafka-summary-1752105684783.txt b/kafka-analysis/kafka-summary-1752105684783.txt new file mode 100644 index 0000000..d7eb92b --- /dev/null +++ b/kafka-analysis/kafka-summary-1752105684783.txt @@ -0,0 +1,77 @@ +Kafka Analysis Summary +---------------------- +ZooKeepers: 1 +Brokers: 1 +Total Topics: 2 +User Topics: 1 +Internal Topics: 1 +Total Partitions: 51 +Topics with Issues: 0 + +Health Check Results +------------------- +Total Checks: 14 +āœ… Passed: 7 +āŒ Failed: 1 +āš ļø Warnings: 5 + +āœ… Replication Factor vs Broker Count + šŸ“‹ Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count. + All topics have appropriate replication factor (≤ 1 brokers) + +āœ… Topic Partition Distribution + šŸ“‹ Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions. + Good partition distribution: avg=1.0, min=1, max=1 + +āš ļø Consumer Group Health + šŸ“‹ Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members. + 1 consumer group(s) have no active members: perf-test-group + šŸ’” Recommendation: Consider cleaning up unused consumer groups + +āœ… Internal Topics Health + šŸ“‹ Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions. + All 1 internal topics are healthy + +āœ… Under-Replicated Partitions + All topics have the expected number of in-sync replicas + +āœ… Min In-Sync Replicas Configuration + All topics have appropriate min.insync.replicas configuration + +āš ļø Rack Awareness + šŸ“‹ Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured. + Rack awareness is not configured - no brokers have rack information + šŸ’” Recommendation: Consider enabling rack awareness for better availability and fault tolerance + +āœ… Replica Distribution + šŸ“‹ Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues. + Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51) + +āš ļø Metrics Configuration + šŸ“‹ Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured. + No JMX metrics configuration detected on any brokers + šŸ’” Recommendation: Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis + +ā„¹ļø Generic Kafka Logging Configuration + šŸ“‹ Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured. + Generic Kafka logging configuration check + šŸ’” Recommendation: Verify log4j configuration and log directory permissions in server.properties + +āŒ Generic Kafka Authentication Configuration + šŸ“‹ Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk). + Unauthenticated access is enabled - this is a security risk + šŸ’” Recommendation: Enable SASL or SSL authentication in server.properties for better security + +āš ļø Generic Kafka Quotas Configuration + šŸ“‹ Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available. + No quota configuration detected in Kafka cluster + šŸ’” Recommendation: Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management + +āš ļø Payload Compression + šŸ“‹ Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze. + No compression detected on any of the 1 user topics (0%) + šŸ’” Recommendation: Enable compression on topics to reduce storage usage and improve network performance + +āœ… Infinite Retention Policy + šŸ“‹ Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy. + No topics have infinite retention policy enabled diff --git a/kafka-analysis/kafka-summary-1752105715380.txt b/kafka-analysis/kafka-summary-1752105715380.txt new file mode 100644 index 0000000..d7eb92b --- /dev/null +++ b/kafka-analysis/kafka-summary-1752105715380.txt @@ -0,0 +1,77 @@ +Kafka Analysis Summary +---------------------- +ZooKeepers: 1 +Brokers: 1 +Total Topics: 2 +User Topics: 1 +Internal Topics: 1 +Total Partitions: 51 +Topics with Issues: 0 + +Health Check Results +------------------- +Total Checks: 14 +āœ… Passed: 7 +āŒ Failed: 1 +āš ļø Warnings: 5 + +āœ… Replication Factor vs Broker Count + šŸ“‹ Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count. + All topics have appropriate replication factor (≤ 1 brokers) + +āœ… Topic Partition Distribution + šŸ“‹ Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions. + Good partition distribution: avg=1.0, min=1, max=1 + +āš ļø Consumer Group Health + šŸ“‹ Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members. + 1 consumer group(s) have no active members: perf-test-group + šŸ’” Recommendation: Consider cleaning up unused consumer groups + +āœ… Internal Topics Health + šŸ“‹ Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions. + All 1 internal topics are healthy + +āœ… Under-Replicated Partitions + All topics have the expected number of in-sync replicas + +āœ… Min In-Sync Replicas Configuration + All topics have appropriate min.insync.replicas configuration + +āš ļø Rack Awareness + šŸ“‹ Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured. + Rack awareness is not configured - no brokers have rack information + šŸ’” Recommendation: Consider enabling rack awareness for better availability and fault tolerance + +āœ… Replica Distribution + šŸ“‹ Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues. + Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51) + +āš ļø Metrics Configuration + šŸ“‹ Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured. + No JMX metrics configuration detected on any brokers + šŸ’” Recommendation: Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis + +ā„¹ļø Generic Kafka Logging Configuration + šŸ“‹ Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured. + Generic Kafka logging configuration check + šŸ’” Recommendation: Verify log4j configuration and log directory permissions in server.properties + +āŒ Generic Kafka Authentication Configuration + šŸ“‹ Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk). + Unauthenticated access is enabled - this is a security risk + šŸ’” Recommendation: Enable SASL or SSL authentication in server.properties for better security + +āš ļø Generic Kafka Quotas Configuration + šŸ“‹ Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available. + No quota configuration detected in Kafka cluster + šŸ’” Recommendation: Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management + +āš ļø Payload Compression + šŸ“‹ Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze. + No compression detected on any of the 1 user topics (0%) + šŸ’” Recommendation: Enable compression on topics to reduce storage usage and improve network performance + +āœ… Infinite Retention Policy + šŸ“‹ Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy. + No topics have infinite retention policy enabled diff --git a/kafka-analysis/kafka-summary-1752105730445.txt b/kafka-analysis/kafka-summary-1752105730445.txt new file mode 100644 index 0000000..d7eb92b --- /dev/null +++ b/kafka-analysis/kafka-summary-1752105730445.txt @@ -0,0 +1,77 @@ +Kafka Analysis Summary +---------------------- +ZooKeepers: 1 +Brokers: 1 +Total Topics: 2 +User Topics: 1 +Internal Topics: 1 +Total Partitions: 51 +Topics with Issues: 0 + +Health Check Results +------------------- +Total Checks: 14 +āœ… Passed: 7 +āŒ Failed: 1 +āš ļø Warnings: 5 + +āœ… Replication Factor vs Broker Count + šŸ“‹ Checks if any topic has a replication factor greater than the number of brokers. Healthy: All topics have RF ≤ broker count. Failed: Any topic has RF > broker count. + All topics have appropriate replication factor (≤ 1 brokers) + +āœ… Topic Partition Distribution + šŸ“‹ Checks if user topics have a balanced number of partitions. Healthy: Partition counts are similar. Warning: Large difference between min and max partitions. + Good partition distribution: avg=1.0, min=1, max=1 + +āš ļø Consumer Group Health + šŸ“‹ Checks if all consumer groups have active members. Healthy: All groups have members. Warning: Some groups have no active members. + 1 consumer group(s) have no active members: perf-test-group + šŸ’” Recommendation: Consider cleaning up unused consumer groups + +āœ… Internal Topics Health + šŸ“‹ Checks if all internal topics (names starting with __) have partitions > 0. Healthy: All internal topics have partitions. Failed: Any internal topic has 0 or missing partitions. + All 1 internal topics are healthy + +āœ… Under-Replicated Partitions + All topics have the expected number of in-sync replicas + +āœ… Min In-Sync Replicas Configuration + All topics have appropriate min.insync.replicas configuration + +āš ļø Rack Awareness + šŸ“‹ Checks if rack awareness is configured in the cluster. Healthy: Rack awareness is configured. Warning: Rack awareness is not configured. + Rack awareness is not configured - no brokers have rack information + šŸ’” Recommendation: Consider enabling rack awareness for better availability and fault tolerance + +āœ… Replica Distribution + šŸ“‹ Checks if data replicas are evenly distributed across all brokers. Healthy: Each broker carries a similar number of replicas. Warning/Failed: Some brokers carry significantly more replicas than others, which can cause performance issues. + Perfect replica balance: Each broker carries 51.0 replicas on average (range: 51-51) + +āš ļø Metrics Configuration + šŸ“‹ Checks if monitoring metrics are properly configured. For AWS MSK: Checks Open Monitoring with Prometheus JMX exporter. For others: Checks JMX metrics configuration. Healthy: Metrics are enabled and accessible. Warning: Metrics are not configured or partially configured. + No JMX metrics configuration detected on any brokers + šŸ’” Recommendation: Enable JMX metrics on brokers for better monitoring, alerting, and performance analysis + +ā„¹ļø Generic Kafka Logging Configuration + šŸ“‹ Checks if logging configuration is properly configured. For AWS MSK: Checks LoggingInfo configuration and CloudTrail. For Confluent Cloud/Aiven: Built-in logging is available. For others: Checks log4j configuration. Healthy: Logging is enabled and configured. Warning: Logging is not configured or partially configured. + Generic Kafka logging configuration check + šŸ’” Recommendation: Verify log4j configuration and log directory permissions in server.properties + +āŒ Generic Kafka Authentication Configuration + šŸ“‹ Checks if unauthenticated access is enabled. For AWS MSK: Checks if SASL or SSL is configured. For Confluent Cloud/Aiven: Built-in authentication prevents unauthenticated access. For others: Checks if SASL or SSL is configured. Healthy: Authentication is enabled (no unauthenticated access). Failed: Unauthenticated access is enabled (security risk). + Unauthenticated access is enabled - this is a security risk + šŸ’” Recommendation: Enable SASL or SSL authentication in server.properties for better security + +āš ļø Generic Kafka Quotas Configuration + šŸ“‹ Checks if Kafka quotas are configured and being used. For AWS MSK: Checks quota configuration via AWS console/CLI. For Confluent Cloud/Aiven: Built-in quota management is available. For others: Checks server.properties and kafka-configs.sh for quota settings. Healthy: Quotas are configured and managed. Info: Quotas configuration check available. + No quota configuration detected in Kafka cluster + šŸ’” Recommendation: Configure quotas in server.properties or use kafka-configs.sh to set client quotas for better resource management + +āš ļø Payload Compression + šŸ“‹ Checks if payload compression is enabled on user topics. Analyzes compression.type, compression, and producer.compression.type configurations. Healthy: All user topics have compression enabled (100%). Warning: Some or no topics have compression enabled (<100%). Info: No user topics to analyze. + No compression detected on any of the 1 user topics (0%) + šŸ’” Recommendation: Enable compression on topics to reduce storage usage and improve network performance + +āœ… Infinite Retention Policy + šŸ“‹ Checks if any topics have infinite retention policy enabled (retention.ms = infinite). Healthy: No topics have infinite retention. Warning: Some topics have infinite retention policy (bad practice). Info: Unable to verify retention policy. + No topics have infinite retention policy enabled diff --git a/kraft-config.json b/kraft-config.json new file mode 100644 index 0000000..e365679 --- /dev/null +++ b/kraft-config.json @@ -0,0 +1,15 @@ +{ + "kafka": { + "bootstrap_servers": ["localhost:29092"], + "clientId": "superstream-analyzer", + "vendor": "apache-kafka", + "useSasl": false + }, + "file": { + "outputDir": "./kafka-analysis", + "formats": ["json", "html", "csv", "txt"], + "includeMetadata": true, + "includeTimestamp": true + }, + "email": "test@example.com" +} diff --git a/src/cli.js b/src/cli.js index 4780b15..067bfc6 100644 --- a/src/cli.js +++ b/src/cli.js @@ -46,6 +46,23 @@ class CLI { this.config = config; + // Normalize brokers/bootstrap_servers to array format + if (this.config.kafka) { + // Handle both 'bootstrap_servers' and 'brokers' fields + const brokers = this.config.kafka.bootstrap_servers || this.config.kafka.brokers; + + if (brokers) { + if (Array.isArray(brokers)) { + this.config.kafka.brokers = brokers; + } else if (typeof brokers === 'string') { + this.config.kafka.brokers = brokers.split(',').map(broker => broker.trim()); + } + + // Remove bootstrap_servers field if it exists, standardize on 'brokers' + delete this.config.kafka.bootstrap_servers; + } + } + // Add email if not present in config file if (!this.config.email) { this.config.email = ''; @@ -261,6 +278,10 @@ class CLI { // Build kafka config this.config.kafka = { ...kafkaAnswers, + // Convert brokers string to array + brokers: Array.isArray(kafkaAnswers.brokers) + ? kafkaAnswers.brokers + : kafkaAnswers.brokers.split(',').map(broker => broker.trim()), vendor: vendorAnswer.vendor, useSasl: !!saslConfig, sasl: saslConfig @@ -345,6 +366,12 @@ class CLI { } else { console.log(chalk.gray('Debug: No config file specified, using interactive mode')); await this.promptForConfig(); + + // Override with command line options if provided + if (this.options.bootstrapServers) { + console.log(chalk.gray(`Debug: Overriding brokers with command line option: ${this.options.bootstrapServers}`)); + this.config.kafka.brokers = this.options.bootstrapServers.split(',').map(broker => broker.trim()); + } } // Initialize services without validation