diff --git a/Download-licenses-Script/database-foss.py b/Download-licenses-Script/database-foss.py
index f0d76f5b7..93365eb79 100644
--- a/Download-licenses-Script/database-foss.py
+++ b/Download-licenses-Script/database-foss.py
@@ -10,7 +10,7 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
@@ -20,6 +20,7 @@
 import json
 import os
 
+
 def main():
     download = "..\\Original-DB-Foss-Dataset"
     os.makedirs(download, exist_ok=True)
@@ -27,8 +28,9 @@ def main():
     response = urlopen(url)
     data_json = json.loads(response.read())
     for licenses in data_json:
-        with open(download+'\\'+licenses["rf_shortname"], 'w', encoding ='utf-8') as o1:
-                o1.write(licenses["rf_text"])
+        with open(download+'\\'+licenses["rf_shortname"], 'w', encoding='utf-8') as o1:
+            o1.write(licenses["rf_text"])
+
 
 if __name__ == "__main__":
     main()
diff --git a/Download-licenses-Script/exceptions.py b/Download-licenses-Script/exceptions.py
index 4e3c7b94f..a30e144bf 100644
--- a/Download-licenses-Script/exceptions.py
+++ b/Download-licenses-Script/exceptions.py
@@ -10,7 +10,7 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
@@ -20,25 +20,29 @@
 import json
 import os
 
+
 def extract_exceptions():
-  """
-  There are 41 files of SPDX exception in licenseListVersion: 3.13
-  url of latest SPDX Exception release = https://spdx.org/licenses/exceptions.json
-  """
-  download = "..\\Original-SPDX-Dataset"
-  os.makedirs(download, exist_ok=True)
-  url = 'https://spdx.org/licenses/exceptions.json'
-  response = urlopen(url)
-  data_json = json.loads(response.read())
-
-  for license in data_json["exceptions"]:
-    license["reference"] = license["reference"].replace("./","https://spdx.org/licenses/",1)
-    url2 = license["reference"]
-    response2 = urlopen(url2)
-    data_json2 = json.loads(response2.read())
-    
-    with open(download+'\\'+license["licenseExceptionId"], 'w') as o1:
-          o1.write(data_json2["licenseExceptionText"])
+    """
+    There are 41 files of SPDX exception in licenseListVersion: 3.13
+    url of latest SPDX Exception release = https://spdx.org/licenses/exceptions.json
+    """
+
+    download = "..\\Original-SPDX-Dataset"
+    os.makedirs(download, exist_ok=True)
+    url = 'https://spdx.org/licenses/exceptions.json'
+    response = urlopen(url)
+    data_json = json.loads(response.read())
+
+    for license in data_json["exceptions"]:
+        license["reference"] = license["reference"].replace(
+            "./", "https://spdx.org/licenses/", 1)
+        url2 = license["reference"]
+        response2 = urlopen(url2)
+        data_json2 = json.loads(response2.read())
+
+        with open(download+'\\'+license["licenseExceptionId"], 'w') as o1:
+            o1.write(data_json2["licenseExceptionText"])
+
 
 if __name__ == "__main__":
-    extract_exceptions()
\ No newline at end of file
+    extract_exceptions()
diff --git a/Download-licenses-Script/spdx.py b/Download-licenses-Script/spdx.py
index 0f94888ac..5e44baa10 100644
--- a/Download-licenses-Script/spdx.py
+++ b/Download-licenses-Script/spdx.py
@@ -10,7 +10,7 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
@@ -20,24 +20,27 @@
 import json
 import os
 
+
 def extract_spdx():
-  """
-  There are 460 files of SPDX licenses in licenseListVersion: 3.13
-  url of latest SPDX Exception release = https://spdx.org/licenses/licenses.json
-  """  
-  download = "..\\Original-SPDX-Dataset"
-  os.makedirs(download, exist_ok=True)
-  url = 'https://spdx.org/licenses/licenses.json'
-  response = urlopen(url)
-  data_json = json.loads(response.read())
-
-  for license in data_json["licenses"]:
-    url2 = 'https://spdx.org/licenses/licenses.json'
-    response2 = urlopen(url2)
-    data_json2 = json.loads(response2.read())
-
-    with open(download+'\\'+license["licenseId"], 'w', encoding='utf-8') as o1:
-          o1.write(data_json2["licenseText"])
+    """
+    There are 460 files of SPDX licenses in licenseListVersion: 3.13
+    url of latest SPDX Exception release = https://spdx.org/licenses/licenses.json
+    """
+
+    download = "..\\Original-SPDX-Dataset"
+    os.makedirs(download, exist_ok=True)
+    url = 'https://spdx.org/licenses/licenses.json'
+    response = urlopen(url)
+    data_json = json.loads(response.read())
+
+    for license in data_json["licenses"]:
+        url2 = 'https://spdx.org/licenses/licenses.json'
+        response2 = urlopen(url2)
+        data_json2 = json.loads(response2.read())
+
+        with open(download+'\\'+license["licenseId"], 'w', encoding='utf-8') as o1:
+            o1.write(data_json2["licenseText"])
+
 
 if __name__ == "__main__":
-    extract_spdx()
\ No newline at end of file
+    extract_spdx()
diff --git a/README.md b/README.md
index d455cf2c5..46f69f619 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-<h1 align="center">Minerva Dataset Generation<img src="assets\wcoding.giff" width="80"></h1>
+<h1 align="center">Minerva Dataset Generation<img src="assets\wcoding.gif" width="80"></h1>
 <h2 align="center">Project Overview</h2>
 
 <p align="center">
@@ -84,4 +84,4 @@ Using Nomos to validate generated files. This is a base line <i>regex-based</i>
 And to use multiple cores to validate files (here I am using 3 cores) :
 ```
  sudo nomos -J -d <folder_with_files> -n 3
-```
\ No newline at end of file
+```
diff --git a/Script-Initial-Split/count.py b/Script-Initial-Split/count.py
index 26dc1df3e..3065f4ded 100644
--- a/Script-Initial-Split/count.py
+++ b/Script-Initial-Split/count.py
@@ -10,7 +10,7 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
@@ -20,22 +20,34 @@
 import pandas as pd
 import argparse
 
+
 def main(path):
     file = []
     count = []
 
     for filename in os.listdir(path):
         file.append(filename)
-        lst = os.listdir(os.path.join(path,filename)) # dir is your directory path
+        # dir is your directory path
+        lst = os.listdir(os.path.join(path, filename))
         number_files = len(lst)
-        count.append(number_files)           
+        count.append(number_files)
 
-    data = pd.DataFrame({"files":file,"count":count})
+    data = pd.DataFrame({"files": file, "count": count})
     data.to_csv("Count.csv", index=False)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('path', help='Pass a directory to find original licenses')
+    parser.add_argument(
+        'path', help='Pass a directory to find original licenses')
+
     args = parser.parse_args()
-    path = args.path
+
+    try:
+        path = args.path
+        if not os.path.isdir(path):
+            raise TypeError
+    except TypeError:
+        print("Valid directory not provided")
+
     main(path)
diff --git a/Script-Initial-Split/initial_split.py b/Script-Initial-Split/initial_split.py
index f1df64a08..ad5d41cef 100644
--- a/Script-Initial-Split/initial_split.py
+++ b/Script-Initial-Split/initial_split.py
@@ -10,86 +10,101 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 
+
 import re
 import os
 import argparse
 
-def splitter(file,dirname):
-    
+
+def splitter(file, dirname):
+
     history = []
-    with open(file, 'r', encoding= 'unicode_escape') as f:
+    with open(file, 'r', encoding='unicode_escape') as f:
         content = f.readlines()
-    # you may also want to remove whitespace characters like `\n` at the end of each line
+
+#   you may also want to remove whitespace characters
+#   like `\n` at the end of each line
+
     text = " ".join(content)
     content = text.split(". ")
     content = [x.strip() for x in content]
     para = ""
-    for comb  in range(1,len(content)):
+    for comb in range(1, len(content)):
         for i in range(0, len(content)-comb+1, comb):
-            if len(history)>1000:
+            if len(history) > 1000:
                 history = list(set(history))
-                if len(history)>1000:
+                if len(history) > 1000:
                     break
-            para = para  + " " +  content[i]
-            para = re.sub("\s\s+" , " ", para)
+            para = para + " " + content[i]
+            para = re.sub("\s\s+", " ", para)
             para = para.strip()
             if para not in history:
                 history.append(para)
     history = list(set(history))
-    generate_files(file,history,dirname)
+    generate_files(file, history, dirname)
 
-                
-def generate_files(file,history,dirname):
+
+def generate_files(file, history, dirname):
     counter = 0
     os.makedirs(dirname, exist_ok=True)
     for texts in history:
-        counter+=1
-        name = dirname + '-{}.txt'.format(counter)
-        with open(os.path.join(dirname,name), 'w', encoding= 'unicode_escape') as o1:
-                    o1.write(texts)
-    naive_approach(file,dirname,counter)
+        counter += 1
+        name = dirname + f"""-{counter}.txt"""
+        with open(os.path.join(dirname, name),
+                  'w', encoding='unicode_escape') as o1:
+            o1.write(texts)
+    naive_approach(file, dirname, counter)
 
-def naive_approach(file,dirname,counter):
+
+def naive_approach(file, dirname, counter):
     os.makedirs(dirname, exist_ok=True)
 
-    with open(file, 'r', encoding= 'unicode_escape') as f:
+    with open(file, 'r', encoding='unicode_escape') as f:
         para = sum(line.isspace() for line in f) + 1
 
-    with open(file, 'r+', encoding= 'unicode_escape') as f:
+    with open(file, 'r+', encoding='unicode_escape') as f:
         contents = f.read()
 
     content = contents.split('\n\n')
 
     for i in range(para):
         counter += 1
-        name = dirname + '-{}.txt'.format(counter)
+        name = dirname + f"""-{counter}.txt"""
         try:
-            with open(os.path.join(dirname,name), 'w', encoding= 'unicode_escape') as o1:
+            with open(os.path.join(dirname, name),
+                      'w', encoding='unicode_escape') as o1:
                 o1.write(str(content[i]))
-        except:
+
+        except EnvironmentError:
             break
 
+
 def main(path):
-    for roots, dirs, files in os.walk(path,topdown=True):
+    for roots, dirs, files in os.walk(path, topdown=True):
         for name in files:
             dirname = os.path.splitext(name)[0]
-            file = os.path.join(path,name)
-            splitter(file,dirname)    
+            file = os.path.join(path, name)
+            splitter(file, dirname)
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('path', help='Pass a directory to find original licenses')
+    parser.add_argument(
+        'path', help='Pass a directory to find original licenses')
+
     args = parser.parse_args()
-    path = args.path
-    
-    if path.isdir():
-        main(path)
-    else:
-        print("Invalid directory")
-    
+
+    try:
+        path = args.path
+        if not os.path.isdir(path):
+            raise TypeError
+    except TypeError:
+        print("Valid directory not provided")
+
+    main(path)
diff --git a/markov/__init__.py b/markov/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/markov/helper.py b/markov/helper.py
index cb11ac369..9ae0be8f5 100644
--- a/markov/helper.py
+++ b/markov/helper.py
@@ -10,37 +10,53 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
+
 import os
-from os import walk
-from os.path import splitext
-from os.path import join
 import pandas as pd
+import re
+import string
+
+
+def preprocessing_text(text):
+    text = re.sub(r'\w*\d\w*', '', text)
+    text = re.sub("[\n]+", "\n", text)
+    text = text.strip()
+    punctuationNoPeriod = "[" + "(" + ")" + "]"
+    text = re.sub(punctuationNoPeriod, "", text)
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    text = re.sub(r"\b[a-zA-Z]\b", "", text)
+    text = re.sub("[\s]+", " ", text)
+    text = text.replace('"', '')
+    return text
+
 
 def read_directory(path):
     barlist = list()
     for root, dirs, files in os.walk(path):
-      for f in files:
-        if splitext(f)[1].lower() == ".txt":
-          barlist.append(os.path.join(root, f))
-    #print(barlist)
+        for f in files:
+            if os.path.splitext(f)[1].lower() == ".txt":
+                barlist.append(os.path.join(root, f))
     return barlist
 
+
 def file_vocab(filename):
-    vfile = os.path.join("../Original-SPDX-Dataset",filename + '.txt')
-    with open(vfile, 'r', encoding = 'unicode_escape') as f:
+    vfile = os.path.join("../Original-SPDX-Dataset", filename + '.txt')
+    with open(vfile, 'r', encoding='unicode_escape') as f:
         vocab = f.read()
     return vocab
 
+
 def file_regex(filepath, regexcsv):
-    licensename = os.path.sep.join(filepath.split(os.path.sep)[0:-1]).split(os.path.sep)[-1]
+    licensename = os.path.sep.join(filepath.split(
+        os.path.sep)[0:-1]).split(os.path.sep)[-1]
     df = pd.read_csv(regexcsv)
-    var = df.loc[df.Licenses==licensename,'Regex']
+    var = df.loc[df.Licenses == licensename, 'Regex']
     if var.shape[0] == 0:
         return ""
     else:
-        return var.values[0]
\ No newline at end of file
+        return var.values[0]
diff --git a/markov/markov.py b/markov/markov.py
index 28fcefdc5..c2ce750f0 100644
--- a/markov/markov.py
+++ b/markov/markov.py
@@ -10,74 +10,79 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
+
 import string
 import re
 import intxeger
 import random
 from collections import defaultdict
-#import sys
-# sys.path.append('ngram/ngram')
 
-def regex_expansion(prevsen,latersen,text):
-  result = []
-  while(len(result)<100):
-    final_regex = ""
-    num = random.randint(1,4)
-    for i in range(num):
-      final_regex = final_regex + generate_sen(markov_chain(text),random.randint(1,32),text)+" "
-    fregex_ = prevsen + final_regex + latersen
-    ans = licensestatement_(fregex_)
-    for i in ans:
-      i = re.sub("[\s]+", " ",i)
-      if i not in result:
-        result.append(i)
-  return result
+
+def regex_expansion(prevsen, latersen, text):
+    result = []
+    while(len(result) < 100):
+        final_regex = ""
+        num = random.randint(1, 4)
+        for i in range(num):
+            final_regex = final_regex + \
+                generate_sen(markov_chain(text),
+                             random.randint(1, 32), text)+" "
+        fregex_ = prevsen + final_regex + latersen
+        ans = licensestatement_(fregex_)
+        for i in ans:
+            i = re.sub("[\s]+", " ", i)
+            if i not in result:
+                result.append(i)
+    return result
+
 
 def generate_sen(chain, count, text):
-  text = re.sub(r'\w*\d\w*', '', text)
-  words = text.split(' ')
-  words = [word.translate({ord(c): "" for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"}) for word in words]
-  words = [word.lower() for word in words]
-  word1 = random.choice(words)
-  sentence = word1
-  for i in range(count-2):
-    try:
-      word2 = random.choice(chain[word1])
-      word1 = word2
-      sentence += ' ' + word2
-    except:
-      continue
-  return sentence
+    text = re.sub(r'\w*\d\w*', '', text)
+    words = text.split(' ')
+    words = [word.translate({ord(c): "" for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"}) for word in words]
+    words = [word.lower() for word in words]
+    word1 = random.choice(words)
+    sentence = word1
+    for i in range(count-2):
+        try:
+            word2 = random.choice(chain[word1])
+            word1 = word2
+            sentence += ' ' + word2
+        except Exception:
+            continue
+    return sentence
+
 
 def markov_chain(text):
-  words = text.split(' ')
-  words = [word.translate({ord(c): "" for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"}) for word in words]
-  words = [word.lower() for word in words]
-  m_dict = defaultdict(list)
-  for current_word, next_word in zip(words[0:-1], words[1:]):
-      current_word = re.sub(r'\w*\d\w*', '', current_word)
-      m_dict[current_word].append(next_word)
-  m_dict = dict(m_dict)
-  return m_dict
+    words = text.split(' ')
+    words = [word.translate({ord(c): "" for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"}) for word in words]
+    words = [word.lower() for word in words]
+    m_dict = defaultdict(list)
+    for current_word, next_word in zip(words[0:-1], words[1:]):
+        current_word = re.sub(r'\w*\d\w*', '', current_word)
+        m_dict[current_word].append(next_word)
+    m_dict = dict(m_dict)
+    return m_dict
+
 
 def licensestatement_(regex_):
-  x = intxeger.build(regex_)
-  res=x.sample(N=1)
-  result = res
-  i=2
-  while True:
-      try:
-        result = res
-        result = list(set(result))
-        if len(result) >10:
-          return result 
-        res = x.sample(N=i)
-        i+=1
-      except:    
-        break
-  return result
+    x = intxeger.build(regex_)
+    res = x.sample(N=1)
+    result = res
+    i = 2
+    while True:
+        try:
+            result = res
+            result = list(set(result))
+            if len(result) > 10:
+                return result
+            res = x.sample(N=i)
+            i += 1
+        except Exception:
+            break
+    return result
diff --git a/markov/markov_licenses.py b/markov/markov_licenses.py
index fb36beb39..bb3c689b5 100644
--- a/markov/markov_licenses.py
+++ b/markov/markov_licenses.py
@@ -10,68 +10,62 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
+
 import os
-from os import walk
-from os.path import splitext
-from os.path import join
-#import sys
-#sys.path.append('regex/ngram')
-from preprocess import *
-from markov import *
-from helper import read_directory, file_vocab, file_regex
+from .preprocess import *
+from .markov import *
+from .helper import read_directory, file_vocab, file_regex
 import argparse
-import pandas as pd
-import random
 import pathlib
 import multiprocessing
+import numpy as np
 
-def chunkIt(seq, num):
-    avg = len(seq) / float(num)
-    out = []
-    last = 0.0
 
-    while last < len(seq):
-        out.append(seq[int(last):int(last + avg)])
-        last += avg
+def chunkIt(seq, num):
+    return np.array_split(seq, num)
 
-    return out
 
 def main(files, regexcsv):
 
     pathlib.Path("markovfiles").mkdir(parents=True, exist_ok=True)
-    # files = read_directory(path)
 
     for file in files:
-        filename = os.path.sep.join(file.split(os.path.sep)[0:-1]).split(os.path.sep)[-1]
-        with open(file, 'r', encoding = 'unicode_escape') as f:
+        filename = os.path.sep.join(file.split(os.path.sep)[
+                                    0:-1]).split(os.path.sep)[-1]
+        with open(file, 'r', encoding='unicode_escape') as f:
             content = f.read()
-        
+
         vocabulary = file_vocab(filename)
         regex = file_regex(file, regexcsv)
         regex = regex.strip().replace('"', '')
-        
-        if len(regex)==0:
+
+        if len(regex) == 0:
             continue
 
-        os.makedirs(os.path.join("markovfiles",filename), exist_ok=True)
+        os.makedirs(os.path.join("markovfiles", filename), exist_ok=True)
         preregex = regex.split("(.{1,32} (AND|OR)){1,4}")[0]
         secregex = regex.split("(.{1,32} (AND|OR)){1,4}")[-1]
 
         expansion = []
-        expansion = regex_expansion(preregex,secregex,vocabulary)
-        lst = os.listdir(os.path.join("markovfiles",filename))
+        expansion = regex_expansion(preregex, secregex, vocabulary)
+        lst = os.listdir(os.path.join("markovfiles", filename))
         count = len(lst)
-        
+
         for ind in range(len(expansion)):
-            count+=1
-            with open(os.path.join(os.path.join(os.path.join("markovfiles",filename),'{}-{}.txt'.format(filename,count))), 'w', encoding = 'unicode_escape') as o1:
+            count += 1
+            with open(os.path.join(os.path.join(
+                    os.path.join("markovfiles", filename),
+                    f"""{filename}-{count}.txt""")), 'w',
+                    encoding='unicode_escape') as o1:
+
                 o1.write(content + '.' + expansion[ind])
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -91,17 +85,33 @@ def main(files, regexcsv):
     )
 
     args = parser.parse_args()
-    inputpath = args.inputpath
-    regexcsv = args.regexcsv
-    n = int(args.cores)
+
+    try:
+        inputpath = args.inputpath
+        if not os.path.isdir(inputpath):
+            raise TypeError
+    except TypeError:
+        print("Valid License directory not provided")
+
+    try:
+        regexcsv = args.regexcsv
+        if not os.path.isfile(regexcsv):
+            raise TypeError
+        if os.path.splitext(regexcsv)[1].lower() != ".csv":
+            raise TypeError
+    except TypeError:
+        print("Invalid File or path provided. Expected csv filepath")
+
+    try:
+        n = int(args.cores)
+        if n <= 0:
+            raise ValueError
+    except ValueError:
+        print("Number of cores cannot be a string, zero or a negative number")
 
     samples = read_directory(inputpath)
     ls = chunkIt(samples, n)
-    list_data = []
-
-    for i in range(len(ls)):
-        list_data.append((ls[i], regexcsv))
+    list_data = [(ls[i], regexcsv) for i in range(len(ls))]
 
     with multiprocessing.Pool(processes=n) as pool:
         pool.starmap(main, list_data)
-    
diff --git a/markov/preprocess.py b/markov/preprocess.py
deleted file mode 100644
index 95fb87372..000000000
--- a/markov/preprocess.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
- Copyright (C) 2021 Shreya Singh (shreya.out@gmail.com)
-
- SPDX-License-Identifier: GPL-2.0
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- version 2 as published by the Free Software Foundation.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
- 
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-import re
-import string
-
-def preprocessing_text(text):
-    text = re.sub(r'\w*\d\w*', '', text)
-    text = re.sub("[\n]+", "\n",text)
-    text = text.strip()
-    punctuationNoPeriod = "[" + "(" + ")" + "]"
-    text = re.sub(punctuationNoPeriod, "", text)
-    text = text.translate(str.maketrans('', '', string.punctuation))
-    text =  re.sub(r"\b[a-zA-Z]\b", "", text)
-    text = re.sub("[\s]+", " ",text)
-    text = text.replace('"', '') 
-    return text
diff --git a/ngram/licenses.py b/ngram/licenses.py
index a53f1b701..00b919b19 100644
--- a/ngram/licenses.py
+++ b/ngram/licenses.py
@@ -10,99 +10,99 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
+
+
 import os
-from os import walk
-from os.path import splitext
-from os.path import join
-from preprocessing import *
-from regex_handling import *
-from ngram import *
+from .preprocessing import *
+from .regex_handling import *
+from .ngram import *
 import argparse
 import pandas as pd
 import random
 import pathlib
 import multiprocessing
+import numpy as np
+
 
 def chunkIt(seq, num):
-    avg = len(seq) / float(num)
-    out = []
-    last = 0.0
-    
-    while last < len(seq):
-        out.append(seq[int(last):int(last + avg)])
-        last += avg
+    return np.array_split(seq, num)
 
-    return out
 
 def read_directory(path):
     barlist = list()
     for root, dirs, files in os.walk(path):
-      for f in files:
-        if splitext(f)[1].lower() == ".txt":
-          barlist.append(os.path.join(root, f))
-    #print(barlist)
+        for f in files:
+            if os.path.splitext(f)[1].lower() == ".txt":
+                barlist.append(os.path.join(root, f))
     return barlist
 
+
 def file_vocab(filename):
-    vfile = os.path.join("../Original-SPDX-Dataset",filename + '.txt')
-    # licensename = filepath.split('\\')[-1]
-    with open(vfile, 'r', encoding = 'unicode_escape') as f:
+    vfile = os.path.join("../Original-SPDX-Dataset", filename + '.txt')
+    with open(vfile, 'r', encoding='unicode_escape') as f:
         vocab = f.read()
     return vocab
 
+
 def file_regex(filepath, regexcsv):
-    licensename = os.path.sep.join(filepath.split(os.path.sep)[0:-1]).split(os.path.sep)[-1]
+    licensename = os.path.sep.join(filepath.split(
+        os.path.sep)[0:-1]).split(os.path.sep)[-1]
     df = pd.read_csv(regexcsv)
-    var = df.loc[df.Licenses==licensename,'Regex']
+    var = df.loc[df.Licenses == licensename, 'Regex']
     if var.shape[0] == 0:
         return ""
     else:
         return var.values[0]
 
+
 def main(files, regexcsv):
 
     pathlib.Path("ngramfiles").mkdir(parents=True, exist_ok=True)
-    # files = read_directory(path)
-    
+
     for file in files:
-        filename = os.path.sep.join(file.split(os.path.sep)[0:-1]).split(os.path.sep)[-1]
-        with open(file, 'r', encoding = 'unicode_escape') as f:
+        filename = os.path.sep.join(file.split(os.path.sep)[
+                                    0:-1]).split(os.path.sep)[-1]
+        with open(file, 'r', encoding='unicode_escape') as f:
             content = f.read()
         vocabulary = file_vocab(filename)
         regex = file_regex(file, regexcsv)
         regex = regex.strip().replace('"', '')
 
-        if len(regex)==0:
-            # print('Regex not found for -> ', filename)
+        if len(regex) == 0:
             continue
-        os.makedirs(os.path.join("ngramfiles",filename), exist_ok=True)
+        os.makedirs(os.path.join("ngramfiles", filename), exist_ok=True)
         preregex = regex.split("(.{1,32} (AND|OR)){1,4}")[0]
         secregex = regex.split("(.{1,32} (AND|OR)){1,4}")[-1]
 
         expansion = []
-        for ind in range(2,8):
-            m = create_ngram_model(ind,file)
-            for i in range(1,len(vocabulary)):
+        for ind in range(2, 8):
+            m = create_ngram_model(ind, file)
+            for i in range(1, len(vocabulary)):
                 random.seed(i)
-                generated_text = m.generate_text(np.random.randint(6,31))
+                generated_text = m.generate_text(np.random.randint(6, 31))
                 generated_text = preprocessing_text(generated_text).lower()
                 expansion.append(generated_text)
                 expansion = list(set(expansion))
 
-        expansion_regex = regex_expansion(preregex,expansion,secregex)
-        lst = os.listdir(os.path.join("ngramfiles",filename))
+        expansion_regex = regex_expansion(preregex, expansion, secregex)
+        lst = os.listdir(os.path.join("ngramfiles", filename))
         count = len(lst)
 
         for ind in range(len(expansion_regex)):
-            count+=1
-            with open(os.path.join(os.path.join("ngramfiles",filename),'{}-{}.txt'.format(filename,count)), 'w', encoding = 'unicode_escape') as o1:
+            count += 1
+            with open(os.path.join(
+                    os.path.join("ngramfiles", filename),
+                    f"""{filename}-{count}.txt"""),
+                    'w', encoding='unicode_escape') as o1:
+
                 o1.write(content + '.' + expansion_regex[ind])
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -123,19 +123,33 @@ def main(files, regexcsv):
     )
 
     args = parser.parse_args()
-    inputpath = args.inputpath
-    regexcsv = args.regexcsv
-    n = int(args.cores)
+
+    try:
+        inputpath = args.inputpath
+        if not os.path.isdir(inputpath):
+            raise TypeError
+    except TypeError:
+        print("Valid License directory not provided")
+
+    try:
+        regexcsv = args.regexcsv
+        if not os.path.isfile(regexcsv):
+            raise TypeError
+        if os.path.splitext(regexcsv)[1].lower() != ".csv":
+            raise TypeError
+    except TypeError:
+        print("Invalid File or path provided. Expected csv filepath")
+
+    try:
+        n = int(args.cores)
+        if n <= 0:
+            raise ValueError
+    except ValueError:
+        print("Number of cores cannot be a string, zero or a negative number")
 
     samples = read_directory(inputpath)
     ls = chunkIt(samples, n)
-    list_data = []
-
-    for i in range(len(ls)):
-        list_data.append((ls[i], regexcsv))
+    list_data = [(ls[i], regexcsv) for i in range(len(ls))]
 
     with multiprocessing.Pool(processes=n) as pool:
         pool.starmap(main, list_data)
-
-    # main(inputpath, regexcsv)
-
diff --git a/ngram/ngram.py b/ngram/ngram.py
index 757c30f67..2ba574bbd 100644
--- a/ngram/ngram.py
+++ b/ngram/ngram.py
@@ -10,11 +10,13 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
+
+
 import string
 import re
 from collections import defaultdict
@@ -22,10 +24,13 @@
 from preprocessing import tokenize
 import random
 
+
 def get_ngrams(n: int, tokens: list) -> list:
     tokens = (n-1)*['<START>']+tokens
-    l = [(tuple([tokens[i-p-1] for p in reversed(range(n-1))]), tokens[i]) for i in range(n-1, len(tokens))]
-    return l
+    lr = [(tuple([tokens[i-p-1] for p in reversed(range(n-1))]), tokens[i])
+          for i in range(n-1, len(tokens))]
+    return lr
+
 
 class NgramModel(object):
 
@@ -97,4 +102,4 @@ def create_ngram_model(n, path):
             # add back the fullstop
             sentence += '.'
             m.update(sentence)
-    return m
\ No newline at end of file
+    return m
diff --git a/ngram/preprocessing.py b/ngram/preprocessing.py
index c4acee94d..d19c34a12 100644
--- a/ngram/preprocessing.py
+++ b/ngram/preprocessing.py
@@ -10,7 +10,7 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
@@ -19,20 +19,22 @@
 import string
 from typing import List
 
+
 def preprocessing_text(text):
     text = re.sub(r'\w*\d\w*', '', text)
-    text = re.sub("[\n]+", "\n",text)
+    text = re.sub("[\n]+", "\n", text)
     text = text.strip()
     punctuationNoPeriod = "[" + "(" + ")" + "]"
     text = re.sub(punctuationNoPeriod, "", text)
     text = text.translate(str.maketrans('', '', string.punctuation))
-    text =  re.sub(r"\b[a-zA-Z]\b", "", text)
-    text = re.sub("[\s]+", " ",text)
-    text = text.replace('"', '') 
+    text = re.sub(r"\b[a-zA-Z]\b", "", text)
+    text = re.sub("[\s]+", " ", text)
+    text = text.replace('"', '')
     return text
 
+
 def tokenize(text: str) -> List[str]:
     for punct in string.punctuation:
         text = text.replace(punct, ' '+punct+' ')
     t = text.split()
-    return t
\ No newline at end of file
+    return t
diff --git a/ngram/regex_handling.py b/ngram/regex_handling.py
index 0df8637ef..b0c36e60d 100644
--- a/ngram/regex_handling.py
+++ b/ngram/regex_handling.py
@@ -10,11 +10,12 @@
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
- 
+
  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
+
 import string
 import re
 import intxeger
@@ -22,45 +23,49 @@
 import numpy as np
 import random
 
+
 def licensestatement_(regex_):
-  x = intxeger.build(regex_)
-  res=x.sample(N=1)
-  result = res
-  i=2
-  while True:
-      try:
-        result = res
-        result = list(set(result))
-        if len(result) >10:
-          return result 
-        res = x.sample(N=i)
-        i+=1
-      except:    
-        break
-  return result
+    x = intxeger.build(regex_)
+    res = x.sample(N=1)
+    result = res
+    i = 2
+    while True:
+        try:
+            result = res
+            result = list(set(result))
+            if len(result) > 10:
+                return result
+            res = x.sample(N=i)
+            i += 1
+        except Exception:
+            break
+    return result
+
+
+def regex_expansion(prevsen, input_list, latersen):
+    res_ = []
+    while(len(res_) < 200):
+        final_regex = ""
+        num = random.randint(1, 4)
+        for i in range(num):
+            final_regex = final_regex + \
+                np.random.choice(input_list)+" (and|or) "
+        fregex_ = prevsen + " " + final_regex + " " + latersen
+        ans = licensestatement_(fregex_)
+        for i in ans:
+            i = re.sub("[\s]+", " ", i)
+            res_.append(i)
+    return res_
 
-def regex_expansion(prevsen,input_list,latersen):
-  res_=[]
-  while(len(res_)<200):
-    final_regex = ""
-    num = random.randint(1,4)
-    for i in range(num):
-      final_regex = final_regex + np.random.choice(input_list)+" (and|or) "
-    fregex_ = prevsen + " " + final_regex + " " + latersen
-    ans = licensestatement_(fregex_)
-    for i in ans:
-      i = re.sub("[\s]+", " ",i)
-      res_.append(i)
-  return res_
 
 def generate_statements():
-    for i in range(2,20):
-      m = create_ngram_model(i,key)
-      
-      for i in range(1,len(text)):
-        random.seed(i)
-        generated_text = m.generate_text(np.random.randint(6,31))
-        generated_text = clean_license(generated_text)
-        generated_text = generated_text.lower()
-        expansion.append(generated_text)
-        expansion = list(set(expansion))
\ No newline at end of file
+    for i in range(2, 20):
+        m = create_ngram_model(i, key)
+
+        for i in range(1, len(text)):
+            random.seed(i)
+            generated_text = m.generate_text(np.random.randint(6, 31))
+            generated_text = clean_license(generated_text)
+            generated_text = generated_text.lower()
+            expansion.append(generated_text)
+            expansion = list(set(expansion))
diff --git a/requirements.txt b/requirements.txt
index 38a6919db..8c1654492 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
-pandas==1.2.4
-numpy==1.18.2
+pandas>=1.2.4
+numpy>=1.18.2
 intxeger==0.1.1