1+ import pandas as pd
2+ import matplotlib .pyplot as plt
3+
4+ # Function to load data from a CSV or Excel file
5+ def load_data (file_path ):
6+ """Load data from a CSV or Excel file."""
7+ if file_path .endswith ('.csv' ):
8+ data = pd .read_csv (file_path )
9+ elif file_path .endswith ('.xlsx' ):
10+ data = pd .read_excel (file_path )
11+ else :
12+ raise ValueError ("Unsupported file format. Please use .csv or .xlsx" )
13+ return data
14+
15+ # Function to clean data
16+ def clean_data (df ):
17+ """Clean the DataFrame by handling missing values and duplicates."""
18+ # Remove duplicate rows
19+ df = df .drop_duplicates ()
20+
21+ # Fill missing values with the mean for numerical columns
22+ for column in df .select_dtypes (include = ['float64' , 'int64' ]).columns :
23+ df [column ].fillna (df [column ].mean (), inplace = True )
24+
25+ # Drop rows with missing values in non-numerical columns
26+ df = df .dropna ()
27+
28+ return df
29+
30+ # Function to visualize data
31+ def visualize_data (df , column_name ):
32+ """Generate bar and pie charts for a specified column in the DataFrame."""
33+ # Check if the specified column exists
34+ if column_name not in df .columns :
35+ raise ValueError (f"Column '{ column_name } ' does not exist in the DataFrame." )
36+
37+ # Generate a bar chart
38+ df [column_name ].value_counts ().plot (kind = 'bar' , color = 'skyblue' )
39+ plt .title (f'Bar Chart of { column_name } ' )
40+ plt .xlabel (column_name )
41+ plt .ylabel ('Count' )
42+ plt .xticks (rotation = 45 )
43+ plt .tight_layout ()
44+ plt .show ()
45+
46+ # Generate a pie chart
47+ df [column_name ].value_counts ().plot (kind = 'pie' , autopct = '%1.1f%%' , startangle = 90 , colors = plt .cm .Paired .colors )
48+ plt .title (f'Pie Chart of { column_name } ' )
49+ plt .ylabel ('' ) # Hide the y-label
50+ plt .tight_layout ()
51+ plt .show ()
52+
53+ # Main function
54+ def main ():
55+ # Specify the file path (change this to your file location)
56+ file_path = 'data.csv' # Replace with your CSV or Excel file path
57+
58+ try :
59+ # Load the data
60+ data = load_data (file_path )
61+ print ("Data Loaded Successfully!" )
62+
63+ # Display the first few rows of the data
64+ print ("\n First few rows of the data:" )
65+ print (data .head ())
66+
67+ # Clean the data
68+ cleaned_data = clean_data (data )
69+ print ("\n Data Cleaned Successfully!" )
70+
71+ # Display the cleaned data
72+ print ("\n First few rows of the cleaned data:" )
73+ print (cleaned_data .head ())
74+
75+ # Visualize a specified column
76+ column_name = input ("\n Enter the column name you want to visualize: " )
77+ visualize_data (cleaned_data , column_name )
78+
79+ except Exception as e :
80+ print (f"An error occurred: { e } " )
81+
82+ # Run the program
83+ if __name__ == "__main__" :
84+ main ()
0 commit comments