diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7d..1434e2d 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a..fc32529 100644 Binary files a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc and b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc index 8248a16..9a5f45f 100644 Binary files a/q01_outlier_removal/__pycache__/build.cpython-36.pyc and b/q01_outlier_removal/__pycache__/build.cpython-36.pyc differ diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..23eb0f7 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,3 +1,4 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd @@ -6,3 +7,13 @@ # Write your Solution here: +def outlier_removal(data): + num_cols = ['ApplicantIncome','CoapplicantIncome','LoanAmount'] + iqr = [] + for i in num_cols: + iqr.append([i,data[i].quantile(0.95)]) + for i in iqr: + data = data.drop(data[(data[i[0]]>i[1])].index) + return data +outlier_removal(loan_data) + diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc index 5a057ff..e82f33d 100644 Binary files a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc and b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc index 4c0b6c7..ed0aefa 100644 Binary files a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc and b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..9064ebb Binary files /dev/null and b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..ead3569 Binary files /dev/null and b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index b56e2bc..a2d6026 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all/build.py # Default Imports import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__')))) @@ -12,3 +13,18 @@ # Write your solution here : +def data_cleaning(loan_data): + X = loan_data.iloc[:, :-1] + y = loan_data.iloc[:, 11] + # Splitting the dataset into the Training set and Test set + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 9) + # Imputing Missing Value in Numerical variable + X_train[['LoanAmount']] = X_train[['LoanAmount']].fillna(X_train.mean(), inplace=True) + X_test[['LoanAmount']] = X_test[['LoanAmount']].fillna(X_train.mean(), inplace=True) + # Imputing Missing Value in Categorical variable + X_train[['Gender', 'Married', 'Dependents', 'Self_Employed','Loan_Amount_Term','Credit_History']] = X_train[['Gender', 'Married', 'Dependents', 'Self_Employed','Loan_Amount_Term','Credit_History']] .apply(lambda x:x.fillna(x.value_counts().index[0])) + X_test[['Gender', 'Married', 'Dependents', 'Self_Employed','Loan_Amount_Term','Credit_History']] = X_test[['Gender', 'Married', 'Dependents', 'Self_Employed','Loan_Amount_Term','Credit_History']] .apply(lambda x:x.fillna(x.value_counts().index[0])) + + return X,y, X_train, X_test, y_train, y_test + + diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..53b4b43 Binary files /dev/null and b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc new file mode 100644 index 0000000..fd7d43e Binary files /dev/null and b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a7030d9 Binary files /dev/null and b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..698172e Binary files /dev/null and b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py index e20ff7b..a6b55f6 100644 --- a/q02_data_cleaning_all_2/build.py +++ b/q02_data_cleaning_all_2/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all_2/build.py # Default Imports import pandas as pd import numpy as np @@ -11,3 +12,16 @@ # Write your solution here : +def data_cleaning_2(X_train, X_test, y_train, y_test): + cols = X_train.columns + num_cols = X_train._get_numeric_data().columns + cat_cols = list(set(cols) - set(num_cols)) + X_train=pd.get_dummies(X_train,columns=cat_cols, drop_first=True) + X_test=pd.get_dummies(X_test,columns=cat_cols, drop_first=True) + + for i in num_cols: + X_train[i] = np.sqrt(X_train[i]) + X_test[i] = np.sqrt(X_test[i]) + return X_train, X_test, y_train, y_test + + diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..3f8a9a1 Binary files /dev/null and b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc new file mode 100644 index 0000000..9b2fcc5 Binary files /dev/null and b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc differ diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..973e08e Binary files /dev/null and b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..cd37a04 Binary files /dev/null and b/q03_logistic_regression/__pycache__/build.cpython-36.pyc differ diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py index cdbd506..d392ee2 100644 --- a/q03_logistic_regression/build.py +++ b/q03_logistic_regression/build.py @@ -1,3 +1,4 @@ +# %load q03_logistic_regression/build.py # Default Imports import pandas as pd from sklearn.preprocessing import StandardScaler @@ -16,3 +17,17 @@ # Write your solution code here: +def logistic_regression(X_train, X_test, y_train, y_test): + cols=['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount'] + stand_scale= StandardScaler() + X_train.loc[:, cols] = stand_scale.fit_transform(X_train.loc[:, cols]) + X_test.loc[:, cols] = stand_scale.fit_transform(X_test.loc[:, cols]) + + lr = LogisticRegression() + lr.fit(X_train, y_train) + y_pred = lr.predict(X_test) + con = confusion_matrix(y_test,y_pred) + return con + + + diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..056e165 Binary files /dev/null and b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc new file mode 100644 index 0000000..c4afcef Binary files /dev/null and b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc differ