From 8b5a61a1a14f75d1da707f158bb25db3c3ccc697 Mon Sep 17 00:00:00 2001 From: mudassirkhan19 Date: Tue, 4 Jul 2017 11:48:41 +0000 Subject: [PATCH] Done --- build.py | 30 ++++++++++++++++++----- build.pyc | Bin 0 -> 2478 bytes tests/__init__.pyc | Bin 0 -> 175 bytes tests/test_get_categorical_variables.pyc | Bin 0 -> 2826 bytes 4 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_get_categorical_variables.pyc diff --git a/build.py b/build.py index c7d540b..29424bb 100644 --- a/build.py +++ b/build.py @@ -1,22 +1,40 @@ +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +from scipy.stats import norm + def get_categorical_variables(df): - return [] + return df[['dept','join_date','quit_date']] def get_numerical_variables(df): - return [] + return df.drop(['dept','join_date','quit_date'], axis=1) def get_numerical_variables_percentile(df): - pass + return pd.concat([df.describe(), pd.DataFrame(df.median().rename('median')).T], axis=0) def get_categorical_variables_modes(df): - pass + cat_df = get_categorical_variables(df) + return cat_df.mode() def get_missing_values_count(df): - pass + return pd.DataFrame(pd.isnull(df).sum().rename('NA_count')) def plot_histogram_with_numerical_values(df): - pass + num_df = get_numerical_variables(df) + plt.subplot(221) + plt.title(num_df.columns[0]) + sns.distplot(num_df.iloc[:,0], color='yellow', fit=norm, kde=False) + plt.subplot(222) + plt.title(num_df.columns[1]) + sns.distplot(num_df.iloc[:,1], color='yellow', fit=norm, kde=False) + plt.subplot(223) + plt.title(num_df.columns[2]) + sns.distplot(num_df.iloc[:,2], color='yellow', fit=norm, kde=False) + plt.subplot(224) + plt.title(num_df.columns[3]) + sns.distplot(num_df.iloc[:,3], color='yellow', fit=norm, kde=False) diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ff9afb7d6fe0955385137213f0f290ee128256c GIT binary patch literal 2478 zcmcgu+ioL85bg0Lb{uDSSr8CF0wIv*C2O&7AcR1meKgV-2@#O2_H@T-r{}WMJuHzx zLJBg7psT%kH-JksN>-XQ~>glJ4>*pBZF@(V{ zQC_K|8s!H%&sy6Q6|a^McGqkN0g~DBT@F1 z8H;jngGI{Qb9u_xI!3zN zRIXea9j#c8TNlMjaZ7bUn6o+VCO8|uhM1jHg`E|x(IL3z^hB2*eLVZNYEDC~b34l` zV`tXr!{e$sv_bx`iVh(76c;L&q{5Csk4GIVg1domc zM5VCC=`y40FjP8%hy|~;@n6Yxl+GNF&*Q+L93TG1FX=w(Lm3W zzIefs5&3l0<@hF-WA#Fp^uOhDa+Ax+i(T$rcR9Vu<@5%ZhBZTa%-x@m(op~(yNItE z;mn!!zIq~@Id)O5r^dR!xSXBC{Y}A&bN^6~T&d8GCn)P{A0yjTXjgcvLrKdm%tHE$Hbm|L|p*Gggjzz^-9qlA#A;F)9TJD-myxKrtQ^esY50Re-z tPbwDEIM9q}3kqh5lx^_h83hDc7@UhaigSl?maUGT6NPEO%W{F$<_qCZEFu5^ literal 0 HcmV?d00001 diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d700fe8c7b61992dce80952a0e455a8f516fea3e GIT binary patch literal 2826 zcmc&$O>Y!O5UtrSdkGOF1d(|PKKfbd4-6PxjUjGb9W^}KqT`>vZ4GV@=D}rHf9%x zlM%Yl|HW?-p%ivuo~RE8&g*0UBfR}-=7O7QxhU$TYSL)XD&%#Quzo-FFT!`{jsDb- zq%P=j-Q=k&k{7PY-Lpdd3KhqAgvb91qe3vif+@{-sojd&O}bo`9RKka%#*(=T*b^t43>8nSiGq z4Dfi3sBhsPZ^DZ)F`aoB@TH3o#weWBxpAiE{oUq)4C;hIfvBFfd6C6z8WUHz2IIWf zCOm0comd@%y%uu|3#@9?Pw)~mzCvmGye@TGwiz1d&FOPj{qo!NY2BRqg-dms)|pN< zb9?i;*?TAJ-b&Uz%IY;t1D1g|n`{KSzgULvF(hL*M}39}Pc#zUjBZ4?@b|ugH9mhV z1lCrZ+gIdgVE8{p*5KbosOPfl#91ZT$CKp;jJ0uhNJ?xq{<0=avI|Y(TXeso zxec?H5Ei$6`hUW#R`g$ax^*zg#HZ!{8;nTq%N+ZBb&TV-Xw@fa-B#f;={&AJgmjDB zKIyzstVq8KA5~dn+fv>xFG*4%i<881J)tyh0XOE(9e+V{hlbq6OqYw@)=FDAKx1BJ zNVy%6qiDP{-W?Ce!wKsQQ(NVMY{aM#J63Su;B>dNu}j7rd!L0i>av7)GNwFFa?7et cd5*JvxHirEoh}bJC4+r4y51k*Zg8XjANO^!I{*Lx literal 0 HcmV?d00001