From 3884f8a122ec1c15c5de907d001c74fbf61006b4 Mon Sep 17 00:00:00 2001 From: rianboy18 Date: Tue, 16 Oct 2018 18:08:26 +0000 Subject: [PATCH] Done --- __pycache__/__init__.cpython-36.pyc | Bin 159 -> 153 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 170 -> 173 bytes .../__pycache__/build.cpython-36.pyc | Bin 655 -> 778 bytes q01_outlier_removal/build.py | 21 ++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 185 -> 179 bytes .../test_q01_outlier_removal.cpython-36.pyc | Bin 1835 -> 1833 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 175 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1233 bytes q02_data_cleaning_all/build.py | 25 ++++++++++++++++-- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 181 bytes .../test_q02_data_cleaning.cpython-36.pyc | Bin 0 -> 3403 bytes 11 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7da778c48a9c58d7d055bbdb642fcb66418..f242d7034670f8a38563ee8425babe97ab686af1 100644 GIT binary patch delta 51 zcmbQwIFpgXn3tE!ByV%{L=JO#H~ozK+*JLd%*4E;{7OR${qX#v?BasNTjn3tDpxuj9lL=JORAN|nc)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)!wA0Qaa9d;kCd diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a105b2b26ec10c60ac4d11fa03f7624d22..2158d58bf6b4f25c18409fc1c0273a054e22d919 100644 GIT binary patch delta 51 zcmZ3*xR#N_n3tE!ByV%{L=JQLDE*B5+*JLd%*4E;{7OR${qX#v?BasNxQda(n3tF9Mtoz`L=JPAaQ%$@+*JL diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc index 8248a1626e901c868c177407620fc7ec379db180..f2fbee38a9c722544d215184a4527dd6876545e6 100644 GIT binary patch literal 778 zcmZ8fy>8nu5GE-{^QQv z8;HBZ2X=y7+4JP^ysk@;QQ6F7rYfF1y-@TKHFRa3SE`jwzUa=cch;NJx%De(MnvpE zC5#a=PuH|;xk*CPXxi|66Ztk=wcuTpTodSp^%kPB15it|P`ZjdJLsD1+MSTqItfKp zr_S#;q(@-!`zpSH#9}R|yjGjzxAB$IOH*G|lO)0nrb)_J`__`G(o7Nk9_((Hw0~-P$eB_Pr5q|FJ1Gq&)+pDQ$)L2rp5CVr|J!-SQY$ Nc8Eu02ofHI_z%7j(Fyll`6t?3ecRlsEGM>c5b_@-O6#-SdR6-RJOj#l~&m2yDwre|8i^6pL1ZH;r z50h7>F8u*aJcofR5_aCFJb&+dFYh_e=kwVQ`K`Xo2>D5N7Q^?~X#NQT5TJ_GRM0Mi zY)fkJXNo@C@iX-E9iIg{&>W9aM-{;~i z^?I~1y$&vQy2|79vPZ2{A{N$39byj7*jPvn;u`tAae8Tvk>Yb6Y5SeHIf{S1uRdbI ziYswlod|ysjIG`}(}^~C9Qs=NAlt@?wG-Z_!P`G%b@}vJWBQ<^a7b^=6<5`HFBKfx zo4ELg*-JNYfI#TMxcBL!ak~Yct;$$hu7UfQS%_07I5c>zF3H>^K?)l`cn2QfS)9Ju T80eLFiN783CBihD(f#~4xYMeW diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..5b458c8 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,8 +1,27 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd +from sklearn.preprocessing import Imputer loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) +def outlier_removal(loan_data): + qv=loan_data[['ApplicantIncome','CoapplicantIncome','LoanAmount']].quantile(0.95) +# return loan_data[((loan_data[['ApplicantIncome','CoapplicantIncome','LoanAmount']]<=qv).all(axis=1))] +# return loan_data[(loan_data['ApplicantIncome']&&bbB)i26S%uC9zG_=qU X&o9a@E=WvH)lbe(N!_f##LWr-2HO_L delta 69 zcmZ3m-q-GbFCKV;- ZCgx=(=IQ69mdB@M=A;&HmSf^(1po{!7gGQL diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0192845cd67630c8786b982ed89f9cdfeec9f75a GIT binary patch literal 175 zcmXr!<>fNT+Z@dR1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuNeJ|{M=OiqRhm+ zr2I-l3;po?qU_>=#N<@{`TK_=2MRtkmQZ{Xzqy y_>{zw#Q5Z#)Wp2Zy!80QoE-i5_{_Y_lK6PNg34PQHo5sJr8%i~AX|%pm;nH@nJ>5i literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f5321d15198cb5dd2c6e578cd5c61d009de5a38 GIT binary patch literal 1233 zcmaKrzi$&U6vxl^J89CSv_Q*<7|4?FV_`u2P>TQwSc(J~B+EU!>7_p3VLOPRWC|+_ zGyeep2s5)+rt%jcCZ6w7R55U;d(ZdrkN5ob^UZFz{p;$t*I#1K`{A8=B52=(Pk#o( zJcfja8;>yJ-o&2-9DxnbTIMfN^P*;KgyVW{f>ZE;XccX|R`-RXQ*?0`c|4x1vuKHM zkM|~h7PDmOL%Yj+j=`){Y!v74IhfUA?T;SbWN6yuRzsz>r6|Hl>>3wR*(9p=PR;QyA0_>HA!dp$*UX zyC*Gh10T1VW6nQt+fh`qvLG6!@j+x`s0bmc4GTC@uqER*WU8!eV2mClwk6#8xZxsr`}@`Cs4VzMVUW|)h&owu literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index b56e2bc..53db425 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -1,14 +1,35 @@ +# %load q02_data_cleaning_all/build.py # Default Imports import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__')))) import pandas as pd import numpy as np -from sklearn.model_selection import train_test_split +from sklearn.model_selection import train_test_split as tts from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) +from sklearn.preprocessing import Imputer + +# imp_mean = Imputer(missing_values='NaN',strategy='mean') +# imp_mean.fit(loan_data['LoanAmount']) +# loan_data['LoanAmount'] = imp_mean.transform(loan_data['LoanAmount']) +def data_cleaning(loan_data): + loan_data['LoanAmount'] = (loan_data['LoanAmount']).fillna(loan_data['LoanAmount'].mean()) + + loan_data['Gender']=loan_data['Gender'].fillna(loan_data['Gender'].mode) + # loan_data[['Gender', 'Married', 'Dependents', 'Self_Employed', 'Loan_Amount_Term', 'Credit_History']] + loan_data['Married']=loan_data['Married'].fillna(loan_data['Married'].mode) + loan_data['Dependents']=loan_data['Dependents'].fillna(loan_data['Dependents'].mode) + loan_data['Self_Employed']=loan_data['Self_Employed'].fillna(loan_data['Self_Employed'].mode) + loan_data['Loan_Amount_Term']=loan_data['Loan_Amount_Term'].fillna(loan_data['Loan_Amount_Term'].mode) + loan_data['Credit_History']=loan_data['Credit_History'].fillna(loan_data['Credit_History'].mode) + X = loan_data.drop(['Loan_Status'],axis=1) + y = loan_data['Loan_Status'] + X_train,X_test,y_train,y_test = tts(X,y,test_size=0.25,random_state=9) + return X,y,X_train,X_test,y_train,y_test + + -# Write your solution here : diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d0fb196d2d12338445a08c18bacbca98c6fac15e GIT binary patch literal 181 zcmYL?I}XAy5JVk`0wLud(6D?O3WPWT4Fv_o%CT3$;CO@A6gd(X;SjW(0xFyWG17Y) zO*6}~9KFYnqsn>p`lZNan~_15ReLolc6Sx~`L9m}CvFI$)1l&SzJdeCi?rGh7`6nS zYbVJWj8sPwaU8IXd?u6NI$NOCNu%+kg9|lk?;(*S0Yn#ELY%hUbGUBU-aKvkZ^@`H DR+%!g literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd582a8b0a641a1088ef8a515d53814716fe28e5 GIT binary patch literal 3403 zcmd6p-EQ1O6o5Up*Y^JAr)k>ILQNqRi^^^jE+`U0MQy1fRY9U4Dud)Yp4qJz|7>Q) z5xXi^+o#|Sco80AZn?@Mz!m4%Np`bo^FwdqmCqTEXU@#|d}h!2yxFY%^T}U7|5($s zzqOgmg7yQr#UCK3MoFxB1hgLOiQyRp`;6F3DqaQJCauI)VtaPcZ^ceh^{PZ;WKg5_ zgm`sUA2g^#s}mjC6;?0$Y}CJ4UejxN3*Mr)!Ig5mSlTDysyI`=x@?M27VZ` zAdS*KbhP?R_VPFmcwY=zIKGVox^Why{*W^og)+)gKTnG(jCMk?=h;v3_12?(0!Bkz z{_w(89>6W0f?x$OPp5?HuQku01~owgDr$kQP@6iSEn1~D&^E2p255&iX$y3fF3?5L zHM#_DE~_PU@6qh`LtGy(T-Jg6A>3jUgw!ONkl)EGZJeN~$j zUlJBDw>g^QHi+@9$G-s#$*2ol@`R;Qxa;(;`y|?9DYV)q$N1nC5UL&s!MJ?9n+LHn zVwSdbWeOJel!;XT)OZty{X6Ww%o5h+QIPIrqmBFBpECZd7zQEhh8bnRydMb}h49Jx zoC$&4hddj=KDxX2HvMB?&JW_aD*=coq<>;SXE;(falrh%Lw{PZCt-rn42O(6C*XEY zn|zEs8Y#$s0F7DlC3ieCN0g22eFH{~zdQm=Sue_2(@eTaAj6k#zrcNb zMBA96=T)%P)_ENiZ=gWZr9l7GjW;-PU`||t0VMze06sexfWx^*{$t2sBozL;(13`8 z*pvFe7?^_!CDOu3oQX4id;Hzhc?f4$j)u(bW!&XV<~%J)xRG#Q41<&g!o{HfmIn!Y zan~JXIm9G87z=0ET^DUzEtWC3CALx_gEVBydhVn1N*_{Xh8bt7(nA}ndSp(SxOAJ8 z{N~WDzX4sjK_6%NrTO>3Wq|-R(jqz;e{n91C+%D|pnMrzQZ_irgDB-I@Wij8SVeIS z#dQ#GVRz?jc4!vQ%PhS6{|6o#x}5aL^NVtG&*FA_j@ahKiCt1wqbVz8K{|<;P}Z|T zpB8Cn5gXlkXJVtvNz5LdIGeY?SMH*ChqHNL_R~3T7eAW^hAt;P3w8(DVH$M&T=33H z3Z>P~9J`Arh0@M;yRIDHPa$jhzN-0tlF>ZIcEk6v+A81h1^nboDArKiK=D3`4^e!C zVjaaa!xb&85IB~N_#_rVIK-(qwqqXtinr}G&ngpfK}$J#8p#44mYv==P^^stnI&bMv>+Ig!@aL-X&Q2c1lw(DtDrLK