update plot_roc_curve to plot_roc (#86)

reiinakano · web-flow · commit 4d72ea8b77a2 · 2018-05-12T19:10:21.000+09:00
diff --git a/README.md b/README.md
@@ -41,7 +41,7 @@ predicted_probas = nb.predict_proba(X_test)
 # The magic happens here
 import matplotlib.pyplot as plt
 import scikitplot as skplt
-skplt.metrics.plot_roc_curve(y_test, predicted_probas)
+skplt.metrics.plot_roc(y_test, predicted_probas)
 plt.show()
 ```
 ![roc_curves](examples/roc_curves.png)
diff --git a/docs/metrics.rst b/docs/metrics.rst
@@ -5,4 +5,4 @@ Metrics Module (API Reference)
 ==============================
 
 .. automodule:: scikitplot.metrics
-   :members: plot_confusion_matrix, plot_roc_curve, plot_ks_statistic, plot_precision_recall, plot_silhouette, plot_calibration_curve, plot_cumulative_gain, plot_lift_curve
+   :members: plot_confusion_matrix, plot_roc, plot_ks_statistic, plot_precision_recall, plot_silhouette, plot_calibration_curve, plot_cumulative_gain, plot_lift_curve
diff --git a/examples/plot_roc.py b/examples/plot_roc.py
@@ -13,5 +13,5 @@
 nb = GaussianNB()
 nb.fit(X, y)
 probas = nb.predict_proba(X)
-skplt.metrics.plot_roc_curve(y_true=y, y_probas=probas)
+skplt.metrics.plot_roc(y_true=y, y_probas=probas)
 plt.show()
diff --git a/scikitplot/metrics.py b/scikitplot/metrics.py
@@ -175,6 +175,8 @@ def plot_confusion_matrix(y_true, y_pred, labels=None, true_labels=None,
     return ax
 
 
+@deprecated('This will be removed in v0.5.0. Please use '
+            'scikitplot.metrics.plot_roc instead.')
 def plot_roc_curve(y_true, y_probas, title='ROC Curves',
                    curves=('micro', 'macro', 'each_class'),
                    ax=None, figsize=None, cmap='nipy_spectral',
@@ -322,6 +324,138 @@ def plot_roc_curve(y_true, y_probas, title='ROC Curves',
     return ax
 
 
+def plot_roc(y_true, y_probas, title='ROC Curves',
+                   plot_micro=True, plot_macro=True, classes_to_plot=None,
+                   ax=None, figsize=None, cmap='nipy_spectral',
+                   title_fontsize="large", text_fontsize="medium"):
+    """Generates the ROC curves from labels and predicted scores/probabilities
+
+    Args:
+        y_true (array-like, shape (n_samples)):
+            Ground truth (correct) target values.
+
+        y_probas (array-like, shape (n_samples, n_classes)):
+            Prediction probabilities for each class returned by a classifier.
+
+        title (string, optional): Title of the generated plot. Defaults to
+            "ROC Curves".
+
+        plot_micro (boolean, optional): Plot the micro average ROC curve.
+            Defaults to ``True``.
+
+        plot_macro (boolean, optional): Plot the macro average ROC curve.
+            Defaults to ``True``.
+
+        classes_to_plot (list-like, optional): Classes for which the ROC
+            curve should be plotted. e.g. [0, 'cold']. If given class does not exist,
+            it will be ignored. If ``None``, all classes will be plotted. Defaults to
+            ``None``
+
+        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
+            plot the curve. If None, the plot is drawn on a new set of axes.
+
+        figsize (2-tuple, optional): Tuple denoting figure size of the plot
+            e.g. (6, 6). Defaults to ``None``.
+
+        cmap (string or :class:`matplotlib.colors.Colormap` instance, optional):
+            Colormap used for plotting the projection. View Matplotlib Colormap
+            documentation for available options.
+            https://matplotlib.org/users/colormaps.html
+
+        title_fontsize (string or int, optional): Matplotlib-style fontsizes.
+            Use e.g. "small", "medium", "large" or integer-values. Defaults to
+            "large".
+
+        text_fontsize (string or int, optional): Matplotlib-style fontsizes.
+            Use e.g. "small", "medium", "large" or integer-values. Defaults to
+            "medium".
+
+    Returns:
+        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
+            drawn.
+
+    Example:
+        >>> import scikitplot as skplt
+        >>> nb = GaussianNB()
+        >>> nb = nb.fit(X_train, y_train)
+        >>> y_probas = nb.predict_proba(X_test)
+        >>> skplt.metrics.plot_roc(y_test, y_probas)
+        <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490>
+        >>> plt.show()
+
+        .. image:: _static/examples/plot_roc_curve.png
+           :align: center
+           :alt: ROC Curves
+    """
+    y_true = np.array(y_true)
+    y_probas = np.array(y_probas)
+
+    classes = np.unique(y_true)
+    probas = y_probas
+
+    if classes_to_plot is None:
+        classes_to_plot = classes
+
+    if ax is None:
+        fig, ax = plt.subplots(1, 1, figsize=figsize)
+
+    ax.set_title(title, fontsize=title_fontsize)
+
+    fpr_dict = dict()
+    tpr_dict = dict()
+
+    indices_to_plot = np.in1d(classes, classes_to_plot)
+    for i, to_plot in enumerate(indices_to_plot):
+        fpr_dict[i], tpr_dict[i], _ = roc_curve(y_true, probas[:, i],
+                                                pos_label=classes[i])
+        if to_plot:
+            roc_auc = auc(fpr_dict[i], tpr_dict[i])
+            color = plt.cm.get_cmap(cmap)(float(i) / len(classes))
+            ax.plot(fpr_dict[i], tpr_dict[i], lw=2, color=color,
+                    label='ROC curve of class {0} (area = {1:0.2f})'
+                          ''.format(classes[i], roc_auc))
+
+    if plot_micro:
+        binarized_y_true = label_binarize(y_true, classes=classes)
+        if len(classes) == 2:
+            binarized_y_true = np.hstack(
+                (1 - binarized_y_true, binarized_y_true))
+        fpr, tpr, _ = roc_curve(binarized_y_true.ravel(), probas.ravel())
+        roc_auc = auc(fpr, tpr)
+        ax.plot(fpr, tpr,
+                label='micro-average ROC curve '
+                      '(area = {0:0.2f})'.format(roc_auc),
+                color='deeppink', linestyle=':', linewidth=4)
+
+    if plot_macro:
+        # Compute macro-average ROC curve and ROC area
+        # First aggregate all false positive rates
+        all_fpr = np.unique(np.concatenate([fpr_dict[x] for x in range(len(classes))]))
+
+        # Then interpolate all ROC curves at this points
+        mean_tpr = np.zeros_like(all_fpr)
+        for i in range(len(classes)):
+            mean_tpr += interp(all_fpr, fpr_dict[i], tpr_dict[i])
+
+        # Finally average it and compute AUC
+        mean_tpr /= len(classes)
+        roc_auc = auc(all_fpr, mean_tpr)
+
+        ax.plot(all_fpr, mean_tpr,
+                label='macro-average ROC curve '
+                      '(area = {0:0.2f})'.format(roc_auc),
+                color='navy', linestyle=':', linewidth=4)
+
+    ax.plot([0, 1], [0, 1], 'k--', lw=2)
+    ax.set_xlim([0.0, 1.0])
+    ax.set_ylim([0.0, 1.05])
+    ax.set_xlabel('False Positive Rate', fontsize=text_fontsize)
+    ax.set_ylabel('True Positive Rate', fontsize=text_fontsize)
+    ax.tick_params(labelsize=text_fontsize)
+    ax.legend(loc='lower right', fontsize=text_fontsize)
+    return ax
+
+
 def plot_ks_statistic(y_true, y_probas, title='KS Statistic Plot',
                       ax=None, figsize=None, title_fontsize="large",
                       text_fontsize="medium"):
@@ -554,7 +688,7 @@ def plot_precision_recall(y_true, y_probas,
             "Precision-Recall curve".
 
         plot_micro (boolean, optional): Plot the micro average ROC curve.
-            Defaults to `True`.
+            Defaults to ``True``.
 
         classes_to_plot (list-like, optional): Classes for which the precision-recall
             curve should be plotted. e.g. [0, 'cold']. If given class does not exist,
diff --git a/scikitplot/tests/test_metrics.py b/scikitplot/tests/test_metrics.py
@@ -13,6 +13,7 @@
 
 from scikitplot.metrics import plot_confusion_matrix
 from scikitplot.metrics import plot_roc_curve
+from scikitplot.metrics import plot_roc
 from scikitplot.metrics import plot_ks_statistic
 from scikitplot.metrics import plot_precision_recall_curve
 from scikitplot.metrics import plot_precision_recall
@@ -156,6 +157,72 @@ def test_array_like(self):
         plot_roc_curve(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
 
 
+class TestPlotROC(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(0)
+        self.X, self.y = load_data(return_X_y=True)
+        p = np.random.permutation(len(self.X))
+        self.X, self.y = self.X[p], self.y[p]
+
+    def tearDown(self):
+        plt.close("all")
+
+    def test_string_classes(self):
+        np.random.seed(0)
+        clf = LogisticRegression()
+        clf.fit(self.X, convert_labels_into_string(self.y))
+        probas = clf.predict_proba(self.X)
+        plot_roc(convert_labels_into_string(self.y), probas)
+
+    def test_ax(self):
+        np.random.seed(0)
+        clf = LogisticRegression()
+        clf.fit(self.X, self.y)
+        probas = clf.predict_proba(self.X)
+        fig, ax = plt.subplots(1, 1)
+        out_ax = plot_roc(self.y, probas)
+        assert ax is not out_ax
+        out_ax = plot_roc(self.y, probas, ax=ax)
+        assert ax is out_ax
+
+    def test_cmap(self):
+        np.random.seed(0)
+        clf = LogisticRegression()
+        clf.fit(self.X, self.y)
+        probas = clf.predict_proba(self.X)
+        plot_roc(self.y, probas, cmap='nipy_spectral')
+        plot_roc(self.y, probas, cmap=plt.cm.nipy_spectral)
+
+    def test_plot_micro(self):
+        np.random.seed(0)
+        clf = LogisticRegression()
+        clf.fit(self.X, self.y)
+        probas = clf.predict_proba(self.X)
+        plot_roc(self.y, probas, plot_micro=False)
+        plot_roc(self.y, probas, plot_micro=True)
+
+    def test_plot_macro(self):
+        np.random.seed(0)
+        clf = LogisticRegression()
+        clf.fit(self.X, self.y)
+        probas = clf.predict_proba(self.X)
+        plot_roc(self.y, probas, plot_macro=False)
+        plot_roc(self.y, probas, plot_macro=True)
+
+    def test_classes_to_plot(self):
+        np.random.seed(0)
+        clf = LogisticRegression()
+        clf.fit(self.X, self.y)
+        probas = clf.predict_proba(self.X)
+        plot_roc(self.y, probas, classes_to_plot=[0, 1])
+        plot_roc(self.y, probas, classes_to_plot=np.array([0, 1]))
+
+    def test_array_like(self):
+        plot_roc([0, 'a'], [[0.8, 0.2], [0.2, 0.8]])
+        plot_roc([0, 1], [[0.8, 0.2], [0.2, 0.8]])
+        plot_roc(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
+
+
 class TestPlotKSStatistic(unittest.TestCase):
     def setUp(self):
         np.random.seed(0)
@@ -292,9 +359,8 @@ def test_plot_micro(self):
         clf = LogisticRegression()
         clf.fit(self.X, self.y)
         probas = clf.predict_proba(self.X)
-        ax_micro = plot_precision_recall(self.y, probas, plot_micro=True)
-        ax_class = plot_precision_recall(self.y, probas, plot_micro=False)
-        self.assertNotEqual(ax_micro, ax_class)
+        plot_precision_recall(self.y, probas, plot_micro=True)
+        plot_precision_recall(self.y, probas, plot_micro=False)
 
     def test_cmap(self):
         np.random.seed(0)