From af56727c45acdcff684ce60ef933e0d479023c35 Mon Sep 17 00:00:00 2001 From: gowthamdongari Date: Mon, 10 Jul 2017 13:19:08 +0000 Subject: [PATCH] Done --- build.py | 47 ++++++++++++++++++----- build.pyc | Bin 0 -> 2545 bytes tests/__init__.pyc | Bin 0 -> 175 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2826 bytes 4 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index c7d540b..9f5ce6c 100644 --- a/build.py +++ b/build.py @@ -1,22 +1,51 @@ -def get_categorical_variables(df): - return [] +import numpy as np +import pandas as pd +from scipy.stats import norm +import seaborn as sns +import matplotlib.pyplot as plt +df = pd.read_csv('data/employee_retention_data.csv') -def get_numerical_variables(df): - return [] +def get_categorical_variables(df): + categorical_data = list(df[['dept', 'join_date', 'quit_date']]) + return categorical_data +def get_numerical_variables(df): + numeric = pd.DataFrame._get_numeric_data(df) + return list(numeric) def get_numerical_variables_percentile(df): - pass + per = df.describe().T + return per def get_categorical_variables_modes(df): - pass - + return df[get_categorical_variables(df)].mode() def get_missing_values_count(df): - pass + return pd.DataFrame(df.isnull().sum()) def plot_histogram_with_numerical_values(df): - pass + + num_cols = get_numerical_variables(df) + plt.figure(figsize=(15,6)) + + plt.subplot(221) + plt.title(num_cols[0]) + sns.distplot(df[num_cols[0]], color='Blue', fit=norm, kde=False) + + plt.subplot(222) + plt.title(num_cols[1]) + sns.distplot(df[num_cols[1]], color='Blue', fit=norm, kde=False) + + plt.subplot(223) + plt.title(num_cols[2]) + sns.distplot(df[num_cols[2]], color='Blue', fit=norm, kde=False) + + plt.subplot(224) + plt.title(num_cols[3]) + sns.distplot(df[num_cols[3]], color='Blue', fit=norm, kde=False) + + plt.tight_layout() + plt.show() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c527ec024ae058a26c518c59d1812280d8559e98 GIT binary patch literal 2545 zcmcgu-EQMV6dv12+oZ{+?eZ4^0wEB&Y4ZYTwF0!_Do9==L~bm{Gj%3A_88B2tD=P) z_L67fP55~S9ss^`oIh3F-i5S2IUbMCIp24FCec5`{_nqJKV)M43Gw|Dm-`Ju;y(~Z zhV;nfi39iptnqrIK2C zReEj9gx0Sz_cEhb2wNA|SGe4V5MGE^3{i#@VjhVx!u&Z9ugD=en6VqFD(`!^U0KaG z72o~XXipy0E+C_wpO7CW>pDr40^q;hu8cqL2f-U`-F zliu3KbF^-0Z}uh?y^m>(Ix}uU@3T0_(ex>-Q<%vT@!A|GMslP5a&uVlc&3`C!f7`-tSSV(zN6l&Ry+_QbBsbai{>eCl1x60@q(g{{k& z1`VHGMan!Hy}(h*gj3>MHGZFV5nZ)PWrCj-JOg<)a_(X}?c)m)Z-M5v=Rdl0I5hwO literal 0 HcmV?d00001 diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a0e28f11aca22543b01db7d5089211bba490353 GIT binary patch literal 175 zcmZ9Gu?hk)5JV$dh~R%*6ZZ#1`~n*bwF)6w@4{WOA-f{?-Tr`kVCBHPfhmSrfA+gE zg-0+ytu)VS-YB9m*KTPpox=Qq@7lpY8KI}Q3_eFHy>AI_)JCp8AOwmC7`=T`vzY5d nGoo!MSR_%g(Tisk5M*I+E|nzC9qKGw9Y1D@uL)1f1y-9cxs@yy literal 0 HcmV?d00001 diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17a7515f4afcfe982ec809d550ad5e048281c129 GIT binary patch literal 2826 zcmc&$%We}%6us?NVg^J8=EaDIgpkIY#4C1aG$Ny!9g7YU!a`~l-Pfj*eweN@$WmA_ zf6NEKIk$sjg?PjU?4)nkt-kd*=hUsL?uTnvPV)45Drdhg-hbio6POJDkrX0jB$G%Y z`W-1cGU-azl}S&uE607&p1eoIz}y|tfw{YqAs~`zBw1hn6L~N4d@^JgN239{&p+Zf zicku>Fi+KoL+AC0zYp(VoVnn}TF#4lshTtzvFqHe*T zZNiH&F`a4X@ul++hA14@xpAiEyCkKf+7Q_zI=*tGd+jtbQ9_xiYJ(8DtngtD6%)cd3rkI@7UcZhu-g`=4dq zU&*>pS-pmFz%uY=lZ`+R=F4ywLo#-A)DMX8L?h8&bUnI>zb_T6@%dXRc&78tMNzrh z*G2smKCxDdQZFpwRuUrlOj(j=CQ(|Kb5||tQf9AJj`bgwfvM$P;hLps9CHc9S81qQ z%)w36n9bS`Bx$>9-o^}8YF@&_4Se?majxZ>np~cTjCcg|RcazfkdIjZ$4E(}YP-7~ z8Ds0xa4Xs%O)}Re<(^ciF>ATnvd!6^vu$$aKCgUmRjQVy^H#OLvFQJ4T-l~stB5t; zz9N4B!~ZF=2LE@2dM>+GoK=#2J6Y~wtc|-vQevy|7d2s$oof=`r27-i&oFBVVR751 z{{d#TqW{vhdS2WU%+5tK9+a2G_fv0HO7>nE(I) literal 0 HcmV?d00001