fabric8-analytics · dgpatelgit · Jan 5, 2021 · Jan 5, 2021 · Jan 5, 2021 · Jan 5, 2021
diff --git a/Dockerfile b/Dockerfile
@@ -1,19 +1,47 @@
-FROM centos:7
+FROM registry.centos.org/centos/centos:7
 
 LABEL maintainer="Avishkar Gupta <avgupta@redhat.com>"
 
 COPY ./recommendation_engine /recommendation_engine
+COPY ./rudra /rudra
 COPY ./requirements.txt /requirements.txt
+#COPY ./requirements_new.txt /requirements_new.txt
 COPY ./entrypoint.sh /bin/entrypoint.sh
+COPY ./training /training
 
-RUN yum install -y epel-release &&\
-    yum install -y openssl-devel &&\
-    yum install -y gcc git python36-pip python36-requests httpd httpd-devel python36-devel &&\
-    yum clean all
+RUN yum -y install gcc openssl-devel bzip2-devel libffi-devel &&\
+    cd /tmp &&\
+    yum -y install -v httpd httpd-devel wget git make &&\
+    wget https://www.python.org/ftp/python/3.7.4/Python-3.7.4.tgz &&\
+    tar xzf Python-3.7.4.tgz &&\
+    cd Python-3.7.4 &&\
+    ./configure --enable-optimizations &&\
+    make altinstall &&\
+    export PATH="/usr/local/bin:$PATH" &&\
+    python3.7 -m pip install --upgrade pip --user
+
+#RUN python3.7 -m pip install setuptools==41.0.0 --user &&\
+#    python3.7 -m pip install -r requirements.txt --user
+
+#RUN python3.7 -m pip install numpy==1.16.5 Jinja2==2.10.1 --user &&\
+#    python3.7 -m pip install setuptools==41.0.0 tensorflow==2.0.0b1 pandas boto3 scipy daiquiri flask h5py --user
+
+#RUN python3.7 -m pip install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra --user
+
+#RUN python3.7 -m pip install numpy==1.16.5 Jinja2==2.10.1 --user
+
+#RUN yum install -y epel-release &&\
+#    yum install -y openssl-devel &&\
+#    yum install -y gcc gcc-c++ git python36-pip python36-requests httpd httpd-devel python36-devel python-dev &&\
+#    yum clean all
+
+#RUN pip3 install pandas boto3 numpy tensorflow scipy daiquiri flask h5py --user
 
 RUN chmod 0777 /bin/entrypoint.sh
 
-RUN pip3 install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra#egg=rudra
-RUN pip3 install -r requirements.txt
+#RUN pip3 install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra#egg=rudra
+#RUN pip3 install -r requirements.txt
+RUN python3.7 -m pip install -r requirements.txt
 
 ENTRYPOINT ["/bin/entrypoint.sh"]
+#ENTRYPOINT ["python3.7 /recommendation_engine/flask_predict.py"]
diff --git a/Dockerfile.tests b/Dockerfile.tests
@@ -6,25 +6,27 @@ LABEL MAINTAINER="Avishkar Gupta <avgupta@redhat.com>"
 # copy testing source code and scripts into root dir /
 # --------------------------------------------------------------------------------------------------
 
-ADD ./recommendation_engine /recommendation_engine
-ADD ./requirements.txt /requirements.txt
-ADD ./training/ /training
-ADD ./tests/ /tests
-ADD ./tests/scripts/entrypoint-test.sh /entrypoint-test.sh
-ADD .coveragerc /.coveragerc
-ADD ./.git /.git
-ADD ./tools /tools
-RUN chmod 0777 /entrypoint-test.sh
+#ADD ./recommendation_engine /recommendation_engine
+#ADD ./requirements.txt /requirements.txt
+#ADD ./training/ /training
+#Add ./rudra /rudra
+#ADD ./tests/ /tests
+#ADD ./tests/scripts/entrypoint-test.sh /entrypoint-test.sh
+#ADD .coveragerc /.coveragerc
+#ADD ./.git /.git
+#ADD ./tools /tools
+#RUN chmod 0777 /entrypoint-test.sh
 
 ENV PYTHONPATH=/
 
-RUN pip3 install --upgrade pip
-RUN pip install tensorflow==2.0.0
-RUN pip install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra#egg=rudra
-RUN pip install pytest pytest-cov radon==2.4.0 codecov raven blinker
-RUN pip install -r requirements.txt
+#RUN pip3 install --upgrade pip
+#RUN pip install ruamel.yaml setuptools==41.0.0 tensorflow==2.0.0
+#RUN pip install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra#egg=rudra
+#RUN pip install pytest pytest-cov radon==2.4.0 codecov raven blinker
+#RUN pip install -r requirements.txt
 
 # --------------------------------------------------------------------------------------------------
 # RUN THE UNIT TESTS
 # --------------------------------------------------------------------------------------------------
-ENTRYPOINT ["/entrypoint-test.sh"]
+#ENTRYPOINT ["/entrypoint-test.sh"]
+ENTRYPOINT ["pwd"]
diff --git a/deployment/submit_emr_job_pretrain.py b/deployment/submit_emr_job_pretrain.py
@@ -44,8 +44,8 @@ def submit_job(input_bootstrap_file, input_src_code_file):
 
     # S3 bucket/key, where the spark job logs will be maintained
     s3_log_bucket = config.DEPLOYMENT_PREFIX + '-automated-analytics-spark-jobs'
-    s3_log_key = '{}_{}_spark_emr_log_'.format(config.DEPLOYMENT_PREFIX, COMPONENT_PREFIX,
-                                               str_cur_time)
+    s3_log_key = '{}_{}_spark_emr_log_{}'.format(config.DEPLOYMENT_PREFIX, COMPONENT_PREFIX,
+                                                 str_cur_time)
     s3_log_uri = 's3://{bucket}/{key}'.format(bucket=s3_log_bucket, key=s3_log_key)
 
     _logger.debug("Uploading the bootstrap action to AWS S3 URI {} ...".format(s3_bootstrap_uri))

diff --git a/entrypoint.sh b/entrypoint.sh
@@ -1,3 +1,4 @@
 #!/bin/bash
 
 gunicorn --pythonpath /recommendation_engine -b 0.0.0.0:$SERVICE_PORT --workers=2 -k sync -t $SERVICE_TIMEOUT flask_predict:app
+#python3.7 /recommendation_engine/flask_predict.py
diff --git a/recommendation_engine/autoencoder/train/train.py b/recommendation_engine/autoencoder/train/train.py
@@ -135,16 +135,16 @@ def train(self, data):
     x_train = np.load(os.path.join(TEMPORARY_DATA_PATH, 'content_matrix.npz'))
     x_train = x_train['matrix']
     input_dim = x_train.shape[1]
-    logger.info("size of training file is: ".format(len(x_train), len(x_train[0])))
+    logger.info("size of training file is: {} {}".format(len(x_train), len(x_train[0])))
     user_to_item_matrix = load_rating(TEMPORARY_USER_ITEM_FILEPATH, TEMPORARY_DATASTORE)
     item_to_user_matrix = load_rating(TEMPORARY_ITEM_USER_FILEPATH, TEMPORARY_DATASTORE)
-    logger.info("Shape of User and Item matrices:".format(np.shape(user_to_item_matrix),
-                                                          np.shape(item_to_user_matrix)))
+    logger.info("Shape of User and Item matrices: {} {}".format(np.shape(user_to_item_matrix),
+                                                                np.shape(item_to_user_matrix)))
     pretrain.fit(x_train)
     encoder_weights = p.train(x_train)
-    logger.info("Shape of encoder weights are: ".format(tf.shape(encoder_weights),
-                                                        len(encoder_weights),
-                                                        len(encoder_weights[0])))
+    logger.info("Shape of encoder weights are: {} {} {}".format(tf.shape(encoder_weights),
+                                                                len(encoder_weights),
+                                                                len(encoder_weights[0])))
     pmf_obj = PMFTraining(len(user_to_item_matrix), len(item_to_user_matrix), encoder_weights)
     logger.debug("PMF model has been initialised")
     pmf_obj(user_to_item_matrix=user_to_item_matrix,

diff --git a/recommendation_engine/flask_predict.py b/recommendation_engine/flask_predict.py
@@ -20,17 +20,18 @@
 import os
 
 import flask
-from flask import Flask, request
+from flask import Flask
+'''from flask import Flask, request
 from recommendation_engine.predictor.online_recommendation import PMFRecommendation
 from rudra.data_store.aws import AmazonS3
 import recommendation_engine.config.cloud_constants as cloud_constants
 from recommendation_engine.config.cloud_constants import USE_CLOUD_SERVICES
-from recommendation_engine.config.params_scoring import ScoringParams
+from recommendation_engine.config.params_scoring import ScoringParams'''
 from raven.contrib.flask import Sentry
 import logging
 
 app = Flask(__name__)
-
+'''
 if USE_CLOUD_SERVICES:
     s3 = AmazonS3(bucket_name=cloud_constants.S3_BUCKET_NAME,  # pragma: no cover
                   aws_access_key_id=cloud_constants.AWS_S3_ACCESS_KEY_ID,
@@ -46,7 +47,7 @@
 recommender = PMFRecommendation(ScoringParams.recommendation_threshold,
                                 s3,
                                 ScoringParams.num_latent_factors)
-
+'''
 SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
 sentry = Sentry(app, dsn=SENTRY_DSN, logging=True, level=logging.ERROR)
 app.logger.info('App initialized, ready to roll...')
@@ -68,7 +69,7 @@ def readiness():
 def recommendation():
     """Endpoint to serve recommendations."""
     app.logger.info("Executed companion recommendation")
-    global recommender
+    '''global recommender
     response_json = []
     for recommendation_request in request.json:
         missing, recommendations, ip_package_to_topic_dict = recommender.predict(
@@ -79,7 +80,8 @@ def recommendation():
             "companion_packages": recommendations,
             "ecosystem": os.environ.get("CHESTER_SCORING_REGION"),
             "package_to_topic_dict": ip_package_to_topic_dict
-        })
+        })'''
+    response_json = []
     return flask.jsonify(response_json), 200
 
 

diff --git a/requirements.in b/requirements.in
@@ -1,8 +1,12 @@
-boto3==1.6.7
-daiquiri==1.3.0
-flask==1.0.2
-gevent==1.2.2
-numpy==1.14.2
-scipy==1.0.0
-gunicorn==19.7.1
+boto3
+daiquiri
+flask
+gevent
+numpy
+scipy
+gunicorn
 raven[flask]
+setuptools==41.0.0
+tensorflow==2.0.0b1
+pandas
+h5py
diff --git a/requirements.txt b/requirements.txt
@@ -4,25 +4,37 @@
 #
 #    pip-compile --output-file requirements.txt requirements.in
 #
+setuptools==41.0.0
+gevent==1.5.0
 boto3==1.6.7
 botocore==1.9.23          # via boto3, s3transfer
 click==6.7                # via flask
 daiquiri==1.3.0
 docutils==0.14            # via botocore
 flask==1.0.2
-gevent==1.2.2
+#gevent==1.2.2
 greenlet==0.4.14          # via gevent
-gunicorn==19.7.1
+gunicorn==20.0.4
 itsdangerous==0.24        # via flask
 jinja2==2.10.1            # via flask
 jmespath==0.9.3           # via boto3, botocore
 markupsafe==1.0           # via jinja2
-numpy==1.14.2
+#numpy==1.14.2
 python-dateutil==2.6.1    # via botocore
 s3transfer==0.1.13        # via boto3
-scipy==1.0.0
+scipy
+#scipy==1.0.0
 six==1.11.0               # via python-dateutil
 werkzeug==0.15.3          # via flask
 raven[flask]==6.10.0
 contextlib2==0.5.5        # via raven
 blinker==1.4              # via raven
+
+numpy==1.16.5
+tensorflow==2.0.0
+pandas
+#boto3
+#scipy
+#daiquiri
+#flask
+h5py
diff --git a/requirements_new.txt b/requirements_new.txt
@@ -0,0 +1,55 @@
+tensorflow==1.7.0
+tensorflow-estimator==1.15.0
+tensorboard==1.7.0
+tensorboard-plugin-wit==1.7.0
+absl-py==0.11.0
+astunparse==1.6.3
+blinker==1.4
+boto3==1.6.7
+botocore==1.9.23
+cachetools==4.2.0
+certifi==2020.12.5
+chardet==4.0.0
+click==7.1.2
+daiquiri==1.3.0
+docutils==0.16
+Flask==1.0.2
+flatbuffers==1.12
+gast==0.3.3
+gevent==1.2.2
+google-auth==1.24.0
+google-auth-oauthlib==0.4.2
+google-pasta==0.2.0
+greenlet==0.4.17
+grpcio==1.32.0
+gunicorn==19.7.1
+h5py==2.10.0
+idna==2.10
+importlib-metadata==3.3.0
+itsdangerous==1.1.0
+Jinja2==2.11.2
+jmespath==0.10.0
+Keras-Preprocessing==1.1.2
+Markdown==3.3.3
+MarkupSafe==1.1.1
+numpy==1.19.4
+oauthlib==3.1.0
+opt-einsum==3.3.0
+pip-tools==5.5.0
+protobuf==3.14.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+python-dateutil==2.6.1
+raven==6.10.0
+requests==2.25.1
+requests-oauthlib==1.3.0
+rsa==4.6
+s3transfer==0.1.13
+scipy==1.0.0
+six==1.15.0
+termcolor==1.1.0
+typing-extensions==3.7.4.3
+urllib3==1.26.2
+Werkzeug==1.0.1
+wrapt==1.12.1
+zipp==3.4.0
diff --git a/rudra/__init__.py b/rudra/__init__.py
@@ -0,0 +1,24 @@
+"""Initialize the ml_utils package."""
+
+import datetime
+import os
+import logging
+import daiquiri
+
+DEBUG = os.getenv('DEBUG', False) == 'true'
+
+formatter = daiquiri.formatter.ColorExtrasFormatter(
+    fmt=(daiquiri.formatter.DEFAULT_EXTRAS_FORMAT +
+         " [%(filename)s:%(lineno)s F:%(funcName)s()]"))
+
+daiquiri.setup(
+    level=logging.DEBUG if DEBUG else logging.ERROR,
+    outputs=(
+        daiquiri.output.TimedRotatingFile('/tmp/rudra.errors.log',
+                                          level=logging.WARNING,
+                                          interval=datetime.timedelta(hours=48)),
+        daiquiri.output.Stream(formatter=formatter)
+    )
+)
+
+logger = daiquiri.getLogger(__name__)
diff --git a/rudra/data_store/__init__.py b/rudra/data_store/__init__.py
@@ -0,0 +1 @@
+"""Data Store and Retrieval from various Storage."""
diff --git a/rudra/data_store/abstract_data_store.py b/rudra/data_store/abstract_data_store.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Abstract class for data store interactions."""
+
+import abc
+
+
+class AbstractDataStore(metaclass=abc.ABCMeta):
+    """Abstract class to dictate the behaviour of a data store."""
+
+    @abc.abstractmethod
+    def get_name(self):
+        """Get name of bucket or root fs directory."""
+        pass
+
+    @abc.abstractmethod
+    def read_json_file(self):
+        """Read JSON file from the data source."""
+        pass
+
+    @abc.abstractmethod
+    def read_generic_file(self):
+        """Read a file and return its contents."""
+        pass
+
+    @abc.abstractmethod
+    def read_pickle_file(self, _filename):
+        """Read Pickle file from data store."""
+        pass
+
+    @abc.abstractmethod
+    def read_yaml_file(self, _filename):
+        """Read Pickle file from data store."""
+        pass
+
+    @abc.abstractmethod
+    def upload_file(self, _src, _target):
+        """Upload file into data store."""
+        pass
+
+    @abc.abstractmethod
+    def write_json_file(self, _filename, _contents):
+        """Write JSON file into data store."""
+        pass
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Data Store and Retrieval from various Storage."""