diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/KMeansClustering.py b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KMeansClustering.py
new file mode 100644
index 0000000..0edbcf2
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KMeansClustering.py
@@ -0,0 +1,130 @@
+# =============================================================================
+# KMeansClustering.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Randomly generate up to 10 centroids without issue. Each centroid will have a
+# classification. The nearest centroid to a point will determine the point's 
+# classicfication (decide what to do if the distances are equal yourself).
+# Create random test cases until centroids stop mocing and determine whether 
+# each case is likely to have CKD depending on the classification of the
+# nearest centroid.
+# Bonus: Create lines roughly separating each centroid group
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+import NearestNeighborClassifier as NNC
+from scipy.spatial import KDTree as kdt
+
+# =============================================================================
+# Functions
+# =============================================================================
+# randomCentroids function takes in an integer number of clusters to be
+# generated. 
+# OR asks for k number of integer clusters
+# Outputs a 2D array filled with random values between 0-1. The 
+# first column represents glucose and the second column represents hemoglobin.
+# There are k number of rows representing the number of centroids and the
+# classification of each centroid (i.e.: row index = classification value).
+# OR you can have a third column with the classification value.
+def randomCentroids(k):
+    return np.random.rand(k,2)
+
+# assignCentroids function takes in an array of normalized x (hemoglobin) and y 
+# (glucose) values from the CSV file and the randomly generated array of 
+# centroids from randomCentroids. Using the findDistance function from 
+# NearestNeighborClassifier, points are assigned the same classification as the 
+# nearest centroid. A 2D array of the normalized data and its classification 
+# are returned.
+def assignToCentroids(normArr, centArr):
+    return kdt(centArr).query(normArr)[1]
+# print(assignToCentroids(NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, np.array([[.5, .5],[.25,.25]])))
+
+# updateCentroids function inputs the 2D array of centroid locations and of 
+# classified and normalized CSV data. The average x (hemo) and y (gluc) 
+# positions of all data points for each classifications are found and an 
+# updated 2D array with these average cartesian points as the location for the
+# new centroids is returned along with the original cartesian points. 
+#avg of all 1s will be new cent, avg of all 0s will be new cent
+
+def updateCentroids(centArr, classArr, normArr):
+    upCentArr = centArr.copy()
+    for i in range(len(centArr[:,0])):
+        upCentArr[i,0] = np.mean(normArr.gluc[classArr==i])
+        upCentArr[i,1] = np.mean(normArr.hemo[classArr==i])
+    return upCentArr
+# centArr = np.array([[0.5, 0.5], [.25, .25]])
+# print(updateCentroids(
+#     centArr, assignToCentroids(
+#         NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, centArr),
+#         NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
+#     ))
+# print(centArr)
+
+# iterate void function can either
+# a) input information and iterate the original information until centArr ~ 
+#     upCentArr
+def iterate(normArr, centArr):
+    # classArr = np.zeros(len(normArr.gluc))
+    classArr = assignToCentroids(normArr, centArr)
+    upCentArr = updateCentroids(centArr, classArr, normArr)
+    # print(classArr)
+    if (upCentArr != centArr).any():
+        centArr = upCentArr
+        return iterate(normArr, centArr)
+    return centArr
+print(iterate(
+    NNC.normalizeData(NNC.openCSVFile('ckd.csv')), np.array([[.5, .5],[.25,.25]])
+    ))
+
+# graphClusters void function takes in a 1D and a 2D numpy array to graph. The
+# 1D array of centroid locations and classifactions have distinct points on the 
+# graph. The 2D array graphs points of normalized CSV data and colors them the
+# same color as their corresponding centroids. A legend is generated in a
+# reasonable position.
+# Bonus: Create lines roughly separating each centroid group
+def graphClusters():
+    
+    return
+
+# dataAnalysis void function takes in the original parsed CSV classifications 
+# and the final classifications of the data based on K-means clustering (use of
+# centroids) and compares the two to find false/true positives/negatives.
+# Note: This should only run when there are two centroids (i.e.: k = 2)
+# False positive: Percentage of non-CKD were incorrectly labelled by K-Means as
+# being in the CKD cluster
+# True positive (sensitivity): Percentage of CKD patients were correctly 
+# labeled by K-Means 
+# False negative: Percentage of non-CKD were incorrectly labelled by K-Means as
+# being in the CKD cluster
+# True negative (specificity): Percentage of non-CKD patients were correctly 
+# labelled by K-Means 
+# Note: True positive (~93 %) + False positive (~7%) = 100%
+# Note: True Negative (~100%) + False negative (~0%) = 100%
+def dataAnalysis():
+    return 
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in nothing and graphs both the orginial CSV file,
+# the k number of nearest neighbors, and the test case. This function returns 
+# 0.
+def mainDriver():
+    # Open the CSV file using the parsing method from 
+    # NearestNeighborClassifier. No input, outputs 2D numpy array.
+    NNC.openCSVFile
+    
+    # Normalize data using method from NearestNeighborClassifier. Input and
+    # outputs a 2D numpy array
+    NNC.normalizeData()
+    
+    # Graph CSV file using method from NearestNeighborClassifier. Input 2D 
+    # numpy array. Void function.
+    NNC.graphCSVFile()
+    
+    return 0
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/KNearestNeighborClassifier.py b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KNearestNeighborClassifier.py
new file mode 100644
index 0000000..382bd59
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KNearestNeighborClassifier.py
@@ -0,0 +1,67 @@
+# =============================================================================
+# KNearestNeighborClassifier.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Create a random test case and determine whether the case is
+# likely to have CKD depending on the mode of the classifications of the
+# k number of nearest points.
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+import NearestNeighborClassifier as NNC
+from statistics import mode
+
+# =============================================================================
+# Functions
+# =============================================================================
+# findDistanceArray inputs a numpy array, a random point, and an integer k and
+# uses the findDistance function from NearestNeighborClassifier. The function
+# outputs a 1D array containing the k number of nearst points to the random
+# test case.
+def findDistanceArray(normArr, testCase, k):
+    distArr = np.zeros(normArr.len)
+    for i in range(len(distArr)):
+        distArr[i] = NNC.findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
+        kindex = np.argsort(distArr)[:k]
+    return kindex
+
+# graphKNearestNeighbor void function takes in two 1D and one 2D numpy arrays
+# to graph. One of the 1D arrays is a random testCase with its own distinct
+# points. The other 1D array is used to circle the k number of points closest 
+# to the test case. The 2D array contains information parsed from the CSV 
+# column. The first column (hemoglobin) is graphed as the x-axis and the second
+# column (glucose) as the y-axis. The third column  (classification) determines
+# the color of the points. A legend is generated in a reasonable position.
+def graphKNearestNeighbor(testCase, normArr, k):
+    kindex = findDistanceArray(normArr, testCase, k)
+    NNC.graphCSVFile(normArr)
+    plt.scatter(testCase[1], testCase[0],
+                c = ('b' if mode(normArr.disease[kindex])==0 else 'r'),
+                label = 'Test Case',
+                marker = "x")
+    plt.scatter(normArr.hemo[kindex], normArr.gluc[kindex],
+                c='y', label = 'Nearest neighbor(s)')
+    print("butts")
+    plt.legend(fontsize="small")
+    plt.show()
+    return
+
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in nothing and graphs both the orginial CSV file,
+# the k number of nearest neighbors, and the test case. This function returns 
+# 0.5
+def mainDriver():
+    val = int(input("How many neighbors are you looking for: "))
+    test = NNC.createTestCase()
+    normal = NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
+    graphKNearestNeighbor(test, normal, val)
+    return 0
+mainDriver()
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/NearestNeighborClassifier.py b/Summer-2020-Data-Analysis-Project-Brandon-Branch/NearestNeighborClassifier.py
new file mode 100644
index 0000000..6d0e806
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/NearestNeighborClassifier.py
@@ -0,0 +1,112 @@
+# =============================================================================
+# NearestNeighborClassifier.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Create n number of random test cases and determine whether the case is
+# likely to have CKD depending on the classification of the nearest point.
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+
+# =============================================================================
+# Classes
+# =============================================================================
+class Butts:
+    def __init__(self, data):
+        self.gluc = data[:,0]
+        self.hemo = data[:,1]
+        self.disease = data[:,2]
+        self.len = len(data)
+        self.all = data[:,:3]
+        self.paras = data[:,:2]
+        self.shape = np.shape(data)
+        self.colmax = np.amax(data, axis = 0)
+        self.colmin = np.amin(data, axis = 0)
+
+# =============================================================================
+# Functions
+# =============================================================================
+# Parses in file and turns it into Butts class of data
+def openCSVFile(fileName):
+    return Butts(np.genfromtxt(fileName, delimiter=',',skip_header=1))
+
+# Takes in butts class
+# Loops over data normalizing it for every row
+# returns normalized butts class data
+def normalizeData(dataArr):
+    normArr = np.zeros(dataArr.shape)
+    for i in range(len(normArr)):
+        normArr[i] = (dataArr.all[i] - dataArr.colmin) / (dataArr.colmax - dataArr.colmin)
+    return Butts(normArr)
+
+# graphCSVFile void function takes in a 2D numpy array and graphs with the
+# first column (hemoglobin) as the x-axis and second column (glucose) as the 
+# y-axis. The third column (classification) is used to determine the color of
+# the points on the graph.
+def graphCSVFile(normArr):
+    plt.scatter(normArr.hemo[normArr.disease==0], normArr.gluc[normArr.disease==0],
+                c='b', label='No CKD' )
+    plt.scatter(normArr.hemo[normArr.disease==1], normArr.gluc[normArr.disease==1],
+                c='r', label='CKD')
+    plt.title('Hemoglobin and Glucose levels')
+    plt.xlabel('Hemoglobin')
+    plt.ylabel('Glucose')
+    return
+# findDistance function is either:
+# a) takes in an array and a point and returns an array of distances or the
+# minimum distance or
+# B) takes in cartesian coordinates and uses a simple use of the distance
+# formula to return the distance between the two points.
+def findDistance(x1, y1, x2, y2):
+    return np.sqrt((x1-x2)**2+(y1-y2)**2)
+
+# createTestCase function creates two random test cases (hemoglobin and 
+# glucose) from 0-1 and: 
+# creates a new 1D array with the two points
+# return the points raw
+def createTestCase():
+    return np.random.rand(2)
+
+# nearestNeighborIndex takes in the test case point and returns the index of the
+# nearest point to the test case
+def nearestNeighborIndex(testCase, normArr):
+    distArr = np.zeros(normArr.len)
+    for i in range(len(distArr)):
+        distArr[i] = findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
+    nni = distArr.argmin()
+    return nni
+
+# graphNearestNeighbor void function takes in a 2D numpy array (and a cartesian 
+# coordinate depending on createTestCase) and graphs the first column 
+# (hemoglobin) as the x-axis and the second column (glucose) as the y-axis
+# the third column (classification) determines the color of the points. A 
+# randomly generated test case is graphed as a distinct point with a 
+# line connecting it to the nearest neighbor whose classification it takes on.
+# A legend is generated in a reasonable position.
+def graphNearestNeighbor(testCase, normArr):
+    nni = nearestNeighborIndex(testCase, normArr)
+    graphCSVFile(normArr)
+    plt.scatter(testCase[1], testCase[0],
+                c = ('b' if normArr.disease[nni]==0 else 'r'),
+                label = 'Test Case',
+                marker = "x")
+    plt.plot([testCase[1], normArr.hemo[nni]], [testCase[0], normArr.gluc[nni]], 'k-')
+    plt.legend()
+    plt.show()
+    return
+
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in no inputs and graphs both the orginial CSV
+# file and the test case. This function returns 0.
+def mainDriver():
+    graphNearestNeighbor(createTestCase(), normalizeData(openCSVFile('ckd.csv')))
+    return 0
+# mainDriver()
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/README.md b/Summer-2020-Data-Analysis-Project-Brandon-Branch/README.md
new file mode 100644
index 0000000..d1d53d0
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/README.md
@@ -0,0 +1,97 @@
+# Summer-2020-ML-Project
+
+# Nearest Neighbor Classifier Script Description:
+Process and graph a CSV file containing biomedical data that relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+Create n number of random test cases and determine whether the case is likely to have CKD depending on the classification of the nearest point.
+
+# Nearest Neighbor Classifier Function Descriptions:
+openCSVFile function takes in no arguments and parses/organizes data from a CSV file into a 2-D numpy array with the columns being: 
+hemoglobin, glucose, classification and each row being a case.
+
+normalizeData function takes in a 2D numpy array and 
+scales down the first and second columns to range from 0-1 and 
+outputs a 2D array with the normalized data.
+
+graphCSVFile void function takes in a 2D numpy array and graphs with:
+the first column (hemoglobin) as the x-axis and second column (glucose) as the y-axis. 
+The third column (classification) is used to determine the color of the points on the graph.
+
+findDistance function is either takes in cartesian coordinates and
+uses a simple use of the distance formula
+to return the distance between the two points.
+
+createTestCase function creates two random test cases (hemoglobin and glucose) from 0-1 and
+creates/returns a new 1D array with the two points.
+
+graphNearestNeighbor void function takes in a 2D numpy array (and a cartesian 
+coordinate depending on createTestCase) and 
+graphs the first column (hemoglobin) as the x-axis and the second column (glucose) as the y-axis.
+The third column (classification) determines the color of the points. 
+A randomly generated test case is graphed as a distinct point with a line connecting it to the nearest neighbor whose classification it takes on.
+A legend is generated in a reasonable position.
+
+mainDriver function takes in no inputs and graphs both the orginial CSV file and the test case. 
+This function returns 0.
+
+# K Nearest Nearest Neighbor Classifier Script Description:
+
+Process and graph a CSV file containing biomedical data that relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+Create n number of random test cases and determine whether the case is likely to have CKD depending on the mode of the classifications of the k number of nearest points.
+
+# K Nearest Nearest Neighbor Classifier Functions Descriptions:
+
+findDistanceArray inputs a numpy array, a random point, and an integer k and
+uses the findDistance function from NearestNeighborClassifier. 
+The function outputs a 1D array containing the k number of nearst points to the random test case.
+
+graphKNearestNeighbor void function takes in two 1D and one 2D numpy arrays to graph.
+One of the 1D arrays is a random testCase with its own distinct points.
+The other 1D array is used to circle the k number of points closest to the test case.
+The 2D array contains information parsed from the CSV column.
+The first column (hemoglobin) is graphed as the x-axis and the second column (glucose) as the y-axis.
+The third column  (classification) determines the color of the points. 
+A legend is generated in a reasonable position.
+
+mainDriver function takes in nothing and graphs both the orginial CSV file, the k number of nearest neighbors, and the test case.
+This function returns 0.
+
+# K Means Clustering Script Description:
+
+Process and graph a CSV file containing biomedical data that relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+Randomly generate up to 10 centroids without issue. 
+Each centroid will have a classification. 
+The nearest centroid to a point will determine the point's classicfication (decide what to do if the distances are equal yourself).
+Create random test cases until centroids stop mocing and determine whether each case is likely to have CKD depending on the classification of the nearest centroid.
+
+# K Means ClusteringClassifier Functions Descriptions:
+
+randomCentroids function takes in an integer number of clusters to be generated. 
+OR asks for k number of integer clusters
+Outputs a 2D array filled with random values between 0-1. 
+The first column represents hemoglobin and the second column represents glucose.
+There are k number of rows representing the number of centroids and the classification of each centroid (i.e.: row index = classification value).
+OR you can have a third column with the classification value.
+
+assignCentroids function takes in an array of normalized x (hemoglobin) and y (glucose) values from the CSV file and the randomly generated array of centroids from randomCentroids. 
+Using the findDistance function from NearestNeighborClassifier, points are assigned the same classification as the nearest centroid.
+A 2D array of the normalized data and its classification are returned.
+
+updateCentroids function inputs the 2D array of centroid locations and of classified and normalized CSV data.
+The average x (hemo) and y (gluc) positions of all data points for each classifications are found and
+an updated 2D array with these average cartesian points as the location for the new centroids is returned along with the original cartesian points. 
+
+iterate void function can either
+a) input information and iterate the original information until centArr ~ upCentArr
+b) don't input any information and run by itself. Similar to a main script
+The function causes for the centroids to reassign points and update the centroid until the centroids do not move.
+
+graphClusters void function takes in a 1D and a 2D numpy array to graph. 
+The 1D array of centroid locations and classifactions have distinct points on the graph. 
+The 2D array graphs points of normalized CSV data and colors them the same color as their corresponding centroids.
+A legend is generated in a reasonable position.
+
+dataAnalysis void function takes in the original parsed CSV classifications and the final classifications of the data based on K-means clustering (use of centroids) and
+compares the two to find false/true positives/negatives.
+
+mainDriver function takes in nothing and graphs both the orginial CSV file, the k number of nearest neighbors, and the test case. 
+This function returns 0.
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/KNearestNeighborClassifier.cpython-37.pyc b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/KNearestNeighborClassifier.cpython-37.pyc
new file mode 100644
index 0000000..14a61c6
Binary files /dev/null and b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/KNearestNeighborClassifier.cpython-37.pyc differ
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-37.pyc b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-37.pyc
new file mode 100644
index 0000000..e3fd485
Binary files /dev/null and b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-37.pyc differ
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-38.pyc b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-38.pyc
new file mode 100644
index 0000000..15a1e11
Binary files /dev/null and b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-38.pyc differ
diff --git a/Summer-2020-Data-Analysis-Project-Brandon-Branch/ckd.csv b/Summer-2020-Data-Analysis-Project-Brandon-Branch/ckd.csv
new file mode 100644
index 0000000..d071373
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/ckd.csv
@@ -0,0 +1,159 @@
+Glucose,Hemoglobin,Class
+117,11.2,1
+70,9.5,1
+380,10.8,1
+157,5.6,1
+173,7.7,1
+95,9.8,1
+264,12.5,1
+70,10,1
+253,10.5,1
+163,9.8,1
+129,9.1,1
+133,10.3,1
+76,7.1,1
+280,13,1
+210,16.1,1
+219,10.4,1
+295,9.2,1
+118,11.4,1
+224,8.1,1
+128,8.2,1
+118,12,1
+105,11.1,1
+288,7.9,1
+273,8.3,1
+122,12.6,1
+303,10.4,1
+102,8.7,1
+107,8.3,1
+117,10,1
+239,9.5,1
+94,9.9,1
+129,8.1,1
+252,11.2,1
+255,7.3,1
+253,10.9,1
+214,10.9,1
+490,11.5,1
+163,7.9,1
+241,9.6,1
+214,9.4,1
+106,8.6,1
+424,12.6,1
+176,3.1,1
+140,15,0
+70,17,0
+82,15.9,0
+119,15.4,0
+99,13,0
+121,13.6,0
+131,14.5,0
+91,14,0
+98,13.9,0
+104,16.1,0
+131,14.1,0
+122,17,0
+118,15.5,0
+117,16.2,0
+132,14.4,0
+97,14.2,0
+133,13.2,0
+122,13.9,0
+121,15,0
+111,14.3,0
+96,13.8,0
+139,14.8,0
+125,16.5,0
+123,15.7,0
+112,14.5,0
+140,16.3,0
+130,15.5,0
+123,14.6,0
+100,16.9,0
+94,16,0
+81,14.7,0
+93,16.6,0
+124,14.9,0
+89,16.7,0
+125,16.8,0
+91,13.5,0
+127,15.1,0
+96,16.9,0
+128,13.1,0
+122,17.1,0
+128,15.2,0
+137,13.6,0
+81,13.9,0
+102,13.2,0
+132,13.7,0
+104,17.3,0
+131,15.6,0
+102,15,0
+120,17.4,0
+105,15.7,0
+109,13.9,0
+130,15.9,0
+100,14,0
+109,15.8,0
+120,13.4,0
+80,14.1,0
+130,13.5,0
+99,17.7,0
+134,14.2,0
+92,14,0
+132,17.8,0
+88,13.3,0
+100,14.3,0
+130,13.4,0
+95,15,0
+111,16.2,0
+106,14.4,0
+97,13.5,0
+108,17.8,0
+99,13.6,0
+83,17.5,0
+109,15,0
+86,13.6,0
+102,14.6,0
+95,15,0
+87,17.1,0
+107,13.6,0
+117,13,0
+88,17.2,0
+105,14.7,0
+70,13.7,0
+89,15,0
+118,14.8,0
+81,15,0
+125,17.4,0
+82,14.9,0
+107,13.6,0
+83,16.2,0
+79,17.6,0
+109,15,0
+133,13.7,0
+111,16.3,0
+74,15.1,0
+88,16.4,0
+97,13.8,0
+78,16.1,0
+113,15.3,0
+75,16.8,0
+119,13.9,0
+132,15.4,0
+113,16.5,0
+100,16.4,0
+93,16.7,0
+94,15.5,0
+112,17,0
+99,15,0
+85,15.6,0
+133,14.8,0
+117,13,0
+137,14.1,0
+140,15.7,0
+75,16.5,0
+100,15.8,0
+114,14.2,0
+131,15.8,0