From c58cd73d9d45102e3ecba4faa29bf57541c8152a Mon Sep 17 00:00:00 2001 From: bhattbhavesh91 Date: Tue, 11 Jul 2017 14:16:10 +0000 Subject: [PATCH] Done --- build.py | 39 ++++++++++++++++------- build.pyc | Bin 0 -> 2517 bytes tests/__init__.pyc | Bin 0 -> 175 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2826 bytes 4 files changed, 28 insertions(+), 11 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index c7d540b..cf8d2a2 100644 --- a/build.py +++ b/build.py @@ -1,22 +1,39 @@ -def get_categorical_variables(df): - return [] +import numpy as np +import pandas as pd +from pandas import Series, DataFrame +import operator +import matplotlib.pyplot as plt +df = pd.read_csv('data/employee_retention_data.csv') -def get_numerical_variables(df): - return [] +def get_categorical_variables(df): + return df[['dept','join_date','quit_date']] +def get_numerical_variables(df): + return df.drop(['dept','join_date','quit_date'], axis=1) def get_numerical_variables_percentile(df): - pass - + df_temp = get_numerical_variables(df) + return df_temp.describe().T def get_categorical_variables_modes(df): - pass - + cat_df = get_categorical_variables(df) + return cat_df.mode() def get_missing_values_count(df): - pass - + return pd.DataFrame(pd.isnull(df).sum().rename('NA_count')) def plot_histogram_with_numerical_values(df): - pass + num_df = get_numerical_variables(df) + plt.subplot(221) + plt.title(num_df.columns[0]) + sns.distplot(num_df.iloc[:,0], color='yellow', fit=norm, kde=False) + plt.subplot(222) + plt.title(num_df.columns[1]) + sns.distplot(num_df.iloc[:,1], color='yellow', fit=norm, kde=False) + plt.subplot(223) + plt.title(num_df.columns[2]) + sns.distplot(num_df.iloc[:,2], color='yellow', fit=norm, kde=False) + plt.subplot(224) + plt.title(num_df.columns[3]) + sns.distplot(num_df.iloc[:,3], color='yellow', fit=norm, kde=False) diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..501693b913510df8752275e55499a2b19d0b5c57 GIT binary patch literal 2517 zcmcgu&5j#I5U!c=-#T74At%n?JigMl&kCes=C_IpNHdLU#RbLdie_R{W*sF1w!Ey zkR@6K7kWsGNc50wNJWnpJu3RN=u`00dO(Z8h4qlOBU&7gj%Yh3714P>i(906^b?#; zNcBZIBsCD_4N^lCJRTz~^A}#Tm_zjgh`DZzcK!hBSDDWqH(8VSI_f)GFk7*AX4hV}3mIES0Xk zY_zS6_|Y=^p*5bp>KHZ@vrEt6sa3bYtL|%v>82|5bh*jAhd9-4^YD}Dw^j4R)mg5m zd8PDJtL$V|H76H=pZpW}2?y@9pDtTt)r0!XAH(Nb`!vVi>#8w%X4BKGG1<~;$AmZz zgM#Zw-^VhLZg;_|9_Qpd2<9;}J4^WINyLKT3kTVc#u;?1N z@Xnaawm{-{5Kr*vGGdONhvb&I@&X!F_&lIz0c`_12L~Yvivi7ILCU=O5ron%Z_HAA ze)x?PoB;Ggez;0KQnY8VA7Q6gfK6-NWnE$QZ-@(?*N@1KcG^ zw%Tr$vF4_MTm*bmUj$iNc)^(SYn_NzAaH(+X#D`#d zW?n!V3<7;PD3z@mDU&m8ZS_u~l1?uB%OaOUE+Kizzn)W2;;abXhfp{P=NZ-PNOwHoE_#eP7U2 zKHVU|YKr-yS{zY9AN+{g}tw}MZv$@op!2pU9=E07s1YoWZ(N|m{U4H69M zjxqC6p;u-enc)k*$b=(nmbgNNEpBn0n!E>9sBCnmQv6AfjxWto5|eP&liMt~ttQ-Z n{=EZ4+sm!TC?25)wpM?}<>Pqw30S&^aTFef2NB*qyraJXOeQGV literal 0 HcmV?d00001 diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..48afa27379093746a651e136a2a9c6904ae30f6e GIT binary patch literal 175 zcmZ9GOA5k35JX3GA%geFW^7yvBA&pF3l)T6%#jegZqPAt)qe)(4&Z|h5=ISwshgXk}l?;oD&Es`X~1u qO;_^>_#TQ@OKyFN8U;!UQK1mZw$upN#Cwped}?~sasX;|1Mwn_vrn*R8D?vy#L1IM=%-wBPm2C zk&GgV=(nV3$*3(^TSgtxwj6dvJMsw;J##lid**IRhJZ-Mkz`%@Eb>X@-Kftlb_YFl zpZ|~FAVMi@!n{!*cAVGO{t>+GVdjDx>SS6}bJe8Qpk>IbGGYC8>W{+xlSZ#JB&iF! zU)6c)isZ=Ex!Wt$FHmue=Xm@R7!`s67EEc>B0?5`sAYu?p|PgR`LocIcz4i6%ar|) zAwD~J{CgO5Kvs;Uk6`H-ep^Bdc%zRkITkrY=N&n2$p>R5I;rY@Th&{t$^<-ZV1U<4 zMBRbEzX~tL#B|1?!1D$}86ZfjiC zTVG|}TFAOZS-pl~z%uY=la)ZXr}OXtLo#-C)I~&iqLJuobS1iuzi$<+@cCycc%t*h zSy8##mqon=pIFP;M9(bYMiL_VN?DR;CQ({VrmmdRrOaN-9P2;K15?Yp!qs!rIOY*50R2c z)n<3wHO7{u;a0Rlnq;bV$~~!2W7cxDVVko(W!vP+eO~(D%2X{&r;Tbau;^dbZqlS# zqlh(LzaoDE!}k