commit-live-students · sunilhariharan · Nov 27, 2018 · Nov 27, 2018 · Nov 27, 2018 · Nov 27, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc
diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py
@@ -1,3 +1,4 @@
+# %load q01_outlier_removal/build.py
 # Default imports
 import pandas as pd
 
@@ -6,3 +7,13 @@
 
 
 # Write your Solution here:
+def outlier_removal(data):
+    q1=loan_data['ApplicantIncome'].quantile(0.95)
+    q2=loan_data['CoapplicantIncome'].quantile(0.95)
+    q3=loan_data['LoanAmount'].quantile(0.95)
+    df =loan_data.drop(loan_data[(loan_data['ApplicantIncome']>q1)].index)
+    df1=df.drop(df[(df['CoapplicantIncome']>q2)].index)
+    df2=df1.drop(df1[(df1['LoanAmount']>q3)].index)
+    return df2
+
+
diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc
diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc
diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py
@@ -1,8 +1,10 @@
+# %load q02_data_cleaning_all/build.py
 # Default Imports
 import sys, os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__'))))
 import pandas as pd
 import numpy as np
+import statistics
 from sklearn.model_selection import train_test_split
 from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
 
@@ -12,3 +14,22 @@
 
 
 # Write your solution here :
+def data_cleaning(data):
+
+    categoricals = loan_data.select_dtypes(exclude=[np.number])
+    numericals = loan_data.select_dtypes(include=[np.number])
+    numericals['LoanAmount'].fillna(numericals['LoanAmount'].mean(),inplace=True)
+    numericals['Loan_Amount_Term'].fillna(statistics.mode(numericals['Loan_Amount_Term'].values), inplace = True)
+    numericals['Credit_History'].fillna(statistics.mode(numericals['Credit_History'].values), inplace = True)
+    categoricals['Gender'].fillna(statistics.mode(categoricals['Gender'].values), inplace = True)
+    categoricals['Married'].fillna(statistics.mode(categoricals['Married'].values), inplace = True)
+    categoricals['Dependents'].fillna(statistics.mode(categoricals['Dependents'].values), inplace = True)
+    categoricals['Self_Employed'].fillna(statistics.mode(categoricals['Self_Employed'].values), inplace = True)
+    X=loan_data.iloc[:,:-1]
+    y=loan_data.iloc[:,-1]
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=9, test_size=0.25)
+    return X,y,X_train,X_test,y_train,y_test
+
+
+
+
diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py
@@ -1,6 +1,9 @@
+# %load q02_data_cleaning_all_2/build.py
 # Default Imports
 import pandas as pd
 import numpy as np
+from sklearn.preprocessing import LabelEncoder
+
 from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning
 from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
 
@@ -11,3 +14,53 @@
 
 
 # Write your solution here :
+def data_cleaning_2(X_train,X_test,y_train,y_test):
+    X_train['ApplicantIncome']=np.sqrt(X_train['ApplicantIncome'])
+    X_test['ApplicantIncome']=np.sqrt(X_test['ApplicantIncome'])
+    X_train['CoapplicantIncome']=np.sqrt(X_train['CoapplicantIncome'])
+    X_test['CoapplicantIncome']=np.sqrt(X_test['CoapplicantIncome'])
+    X_train['LoanAmount']=np.sqrt(X_train['LoanAmount'])
+    X_test['LoanAmount']=np.sqrt(X_test['LoanAmount'])
+
+    lablel_encoder = LabelEncoder()
+    X_train['Gender'] = lablel_encoder.fit_transform(X_train['Gender'])
+    X_train['Married'] = lablel_encoder.fit_transform(X_train['Married'])
+    X_train['Education'] = lablel_encoder.fit_transform(X_train['Education'])
+    X_train['Self_Employed'] = lablel_encoder.fit_transform(X_train['Self_Employed'])
+
+    X_test['Gender'] = lablel_encoder.fit_transform(X_test['Gender'])
+    X_test['Married'] = lablel_encoder.fit_transform(X_test['Married'])
+    X_test['Education'] = lablel_encoder.fit_transform(X_test['Education'])
+    X_test['Self_Employed'] = lablel_encoder.fit_transform(X_test['Self_Employed'])
+
+
+    numericals_train = X_train.select_dtypes(include=[np.number])
+    categoricals_train = X_train.select_dtypes(exclude=[np.number])
+    dummies_train=pd.get_dummies(categoricals_train)
+    dummies_train_1=dummies_train.loc[:,'Dependents_0':'Dependents_3+']
+    dummies_train_2=dummies_train.loc[:,'Property_Area_Rural':'Property_Area_Urban']
+    dummies_train_final=pd.concat([dummies_train_1,dummies_train_2],axis=1)
+    final_X_train=pd.concat([X_train, dummies_train_final], axis = 1)
+
+    final_X_train=final_X_train.drop('Dependents',axis=1)
+    final_X_train=final_X_train.drop('Property_Area',axis=1)
+    final_X_train=final_X_train.drop('Credit_History',axis=1)
+    final_X_train=final_X_train.drop('Loan_Amount_Term',axis=1)
+
+    numericals_test = X_test.select_dtypes(include=[np.number])
+    categoricals_test = X_test.select_dtypes(exclude=[np.number])
+    dummies_test=pd.get_dummies(categoricals_test)
+    dummies_test_1=dummies_test.loc[:,'Dependents_0':'Dependents_3+']
+    dummies_test_2=dummies_test.loc[:,'Property_Area_Rural':'Property_Area_Urban']
+    dummies_test_final=pd.concat([dummies_test_1,dummies_test_2],axis=1)
+    final_X_test=pd.concat([X_test, dummies_test_final], axis = 1)
+
+    final_X_test=final_X_test.drop('Dependents',axis=1)
+    final_X_test=final_X_test.drop('Property_Area',axis=1)
+    final_X_test=final_X_test.drop('Credit_History',axis=1)
+    final_X_test=final_X_test.drop('Loan_Amount_Term',axis=1)
+
+
+    return final_X_train,final_X_test,y_train,y_test
+
+
diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc
diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc
diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py
@@ -1,7 +1,9 @@
+# %load q03_logistic_regression/build.py
 # Default Imports
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
 from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
 from sklearn.metrics import confusion_matrix
 from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal
 from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning
@@ -15,4 +17,15 @@
 
 
 # Write your solution code here:
+def logistic_regression(X_train,X_test,y_train,y_test):
+    logistic_regressor = LogisticRegression(random_state=9)
+    scaler = StandardScaler()
+    pipeline = Pipeline(steps=[('scaler', scaler),
+                           ('logistic_regression', logistic_regressor)])
+    pipeline.fit(X_test, y_test)
+    y_pred = pipeline.predict(X_test)
+    cm=confusion_matrix(y_test,y_pred)
+    return cm
+
+
 
diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc