From 03bca5bb8255b5468d48df93f9c91f48c3409915 Mon Sep 17 00:00:00 2001
From: Neal McBurnett <neal@mcburnett.org>
Date: Sun, 28 Feb 2021 12:02:56 -0700
Subject: [PATCH] Customize for screenshots, fix install, usage

Fix bugs:
 Save model with correct name.
 Tensorflow should be < 2.0
 matplotlib is needed

Enhancements:

New screenshot-customized train and predict files.
Accept image filenames for predictions.
Assume 1080p screenshots when scaling images
Print predictions
---
 README.md             | 14 ++++++++++
 requirements.txt      |  3 +-
 screenshot_predict.py | 37 +++++++++++++++++++++++++
 screenshot_train.py   | 64 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 screenshot_predict.py
 create mode 100755 screenshot_train.py

diff --git a/README.md b/README.md
index 69e2e55..d4b0296 100644
--- a/README.md
+++ b/README.md
@@ -10,3 +10,17 @@ https://screen-shot-classifier.herokuapp.com/
 # Model :
 Download the model from : https://drive.google.com/open?id=1k99ndVPuxI3kDGs6or2rSUWPC5LSjF-9
 
+# Try standalone prediction code.
+python screenshot_predict.py [files]
+
+# To train a new model, first populate files in two subdirectories each of directories named test_set and training_set,
+# You can pick random files for testing and move them to the test directory.  Specify the number to move as 10-20% of samples
+cd training_set/chats
+ls | shuf -n 20 | xargs -i mv {} ../../test_set/chats
+cd ../others
+ls | shuf -n 20 | xargs -i mv {} ../../test_set/others
+cd ../..
+
+# Then run the training code.
+# It may be necessart to adjust `steps_per_epoch` in the code to match #images / batch_size
+python screenshot_train.py
diff --git a/requirements.txt b/requirements.txt
index bc4b6fc..1c44e8a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ numpy>=1.9.2
 scipy>=0.15.1
 scikit-learn>=0.18
 keras==2.2.4
-tensorflow>=1.15.2
+tensorflow>=1.15.2,<2.0
 h5py==2.7.1
 Pillow>=2.2.2
+matplotlib
diff --git a/screenshot_predict.py b/screenshot_predict.py
new file mode 100644
index 0000000..99c0f22
--- /dev/null
+++ b/screenshot_predict.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+"""
+Generate predictions and show scores for screenshot filenames listed on command line.
+
+TODO:
+ Use labels from training directores, don't assume a classification name
+ Provide option to show images (show=True).
+"""
+
+import os
+os.environ['KERAS_BACKEND'] = 'tensorflow'
+
+import sys
+import matplotlib.pyplot as plt
+from keras.models import load_model
+from keras.preprocessing.image import image
+import numpy as np
+import glob
+
+classifier=load_model("model.h5")
+
+def load_image(img_path, show=True):
+    img_original = image.load_img(img_path)
+    img = image.load_img(img_path, target_size=(48, 54))
+    img_tensor = image.img_to_array(img)                    # (height, width, channels)
+    img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
+    img_tensor /= 255.                                      # imshow expects values in the range [0, 1]
+    if show:
+        plt.imshow(img_original)
+        plt.axis('off')
+        plt.show()
+    return img_tensor
+
+for img_file in sys.argv[1:]:
+  new_image = load_image(img_file, show=False)
+  pred = classifier.predict(new_image)[0][0]
+  print(f'score={pred:.6f} {" map " if pred < 0.5 else "other"} {img_file}')
diff --git a/screenshot_train.py b/screenshot_train.py
new file mode 100755
index 0000000..d5db2cb
--- /dev/null
+++ b/screenshot_train.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+"""
+Generate tensorflow binary classification model for images, as model.h5.
+Assume images are screenshots, and don't bother with augmentation.
+Train the images found in in two subdirectories of training_set and test_set.
+"""
+
+import os
+os.environ['KERAS_BACKEND'] = 'tensorflow'
+
+import math
+from keras.models import Sequential
+from keras.layers import Convolution2D
+from keras.layers import MaxPooling2D
+from keras.layers import Flatten
+from keras.layers import Dense
+
+classifier = Sequential()
+
+batch_size = 16
+train_len = 72
+test_len = 14
+
+classifier.add(Convolution2D(batch_size, 3, 3, input_shape = (48, 54, 3), activation = 'relu'))
+classifier.add(MaxPooling2D(pool_size = (2, 2)))
+
+classifier.add(Convolution2D(batch_size, 3, 3, activation = 'relu'))
+classifier.add(MaxPooling2D(pool_size = (2, 2)))
+
+classifier.add(Flatten())
+
+classifier.add(Dense(output_dim = 128, activation = 'relu'))
+classifier.add(Dense(output_dim = 1, activation = 'sigmoid'))
+
+classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
+
+from keras.preprocessing.image import ImageDataGenerator
+
+train_datagen = ImageDataGenerator(rescale = 1./255)
+
+test_datagen = ImageDataGenerator(rescale=1./255)
+
+# Assume all originals are 1080p: 1920x1080 pixels, and scale by 40x horizontally, 20x vertically to 48x54
+training_set = train_datagen.flow_from_directory(
+        'training_set',
+        target_size=(48, 54),
+        save_to_dir="tmp_resized_images",
+        batch_size=batch_size,
+        class_mode='binary')
+
+test_set = test_datagen.flow_from_directory(
+        'test_set',
+        target_size=(48, 54),
+        batch_size=batch_size,
+        class_mode='binary')
+
+classifier.fit_generator(
+        training_set,
+        steps_per_epoch=math.ceil(train_len / batch_size),
+        epochs=10,
+        validation_data=test_set,
+        validation_steps=math.ceil(test_len / batch_size))
+
+classifier.save("model.h5")