Skip to content

Commit 1b15bea

Browse files
authored
Add files via upload
1 parent d97bd1d commit 1b15bea

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed

Data Analysis Tool/tool.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import pandas as pd
2+
import matplotlib.pyplot as plt
3+
4+
# Function to load data from a CSV or Excel file
5+
def load_data(file_path):
6+
"""Load data from a CSV or Excel file."""
7+
if file_path.endswith('.csv'):
8+
data = pd.read_csv(file_path)
9+
elif file_path.endswith('.xlsx'):
10+
data = pd.read_excel(file_path)
11+
else:
12+
raise ValueError("Unsupported file format. Please use .csv or .xlsx")
13+
return data
14+
15+
# Function to clean data
16+
def clean_data(df):
17+
"""Clean the DataFrame by handling missing values and duplicates."""
18+
# Remove duplicate rows
19+
df = df.drop_duplicates()
20+
21+
# Fill missing values with the mean for numerical columns
22+
for column in df.select_dtypes(include=['float64', 'int64']).columns:
23+
df[column].fillna(df[column].mean(), inplace=True)
24+
25+
# Drop rows with missing values in non-numerical columns
26+
df = df.dropna()
27+
28+
return df
29+
30+
# Function to visualize data
31+
def visualize_data(df, column_name):
32+
"""Generate bar and pie charts for a specified column in the DataFrame."""
33+
# Check if the specified column exists
34+
if column_name not in df.columns:
35+
raise ValueError(f"Column '{column_name}' does not exist in the DataFrame.")
36+
37+
# Generate a bar chart
38+
df[column_name].value_counts().plot(kind='bar', color='skyblue')
39+
plt.title(f'Bar Chart of {column_name}')
40+
plt.xlabel(column_name)
41+
plt.ylabel('Count')
42+
plt.xticks(rotation=45)
43+
plt.tight_layout()
44+
plt.show()
45+
46+
# Generate a pie chart
47+
df[column_name].value_counts().plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=plt.cm.Paired.colors)
48+
plt.title(f'Pie Chart of {column_name}')
49+
plt.ylabel('') # Hide the y-label
50+
plt.tight_layout()
51+
plt.show()
52+
53+
# Main function
54+
def main():
55+
# Specify the file path (change this to your file location)
56+
file_path = 'data.csv' # Replace with your CSV or Excel file path
57+
58+
try:
59+
# Load the data
60+
data = load_data(file_path)
61+
print("Data Loaded Successfully!")
62+
63+
# Display the first few rows of the data
64+
print("\nFirst few rows of the data:")
65+
print(data.head())
66+
67+
# Clean the data
68+
cleaned_data = clean_data(data)
69+
print("\nData Cleaned Successfully!")
70+
71+
# Display the cleaned data
72+
print("\nFirst few rows of the cleaned data:")
73+
print(cleaned_data.head())
74+
75+
# Visualize a specified column
76+
column_name = input("\nEnter the column name you want to visualize: ")
77+
visualize_data(cleaned_data, column_name)
78+
79+
except Exception as e:
80+
print(f"An error occurred: {e}")
81+
82+
# Run the program
83+
if __name__ == "__main__":
84+
main()

0 commit comments

Comments
 (0)