From 6364702ccbbe1b71f2bc8ce8e429a0f15b581c1a Mon Sep 17 00:00:00 2001
From: Szymon Datko <sdatko@redhat.com>
Date: Tue, 6 Dec 2022 17:20:44 +0100
Subject: [PATCH 1/5] Ignore old results

If there are builds in weekly pipeline older than 14 days
then such job is no more interesting for us (probably was
deleted and we do not monitor it anymore).
---
 EoD-stuff/find-zuul-jobs-failures.py | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/EoD-stuff/find-zuul-jobs-failures.py b/EoD-stuff/find-zuul-jobs-failures.py
index 47a841c..c5a40fb 100644
--- a/EoD-stuff/find-zuul-jobs-failures.py
+++ b/EoD-stuff/find-zuul-jobs-failures.py
@@ -102,6 +102,12 @@ def get_builds() -> list:
             for pipeline in PIPELINES:
                 for job in JOBS:
                     build = get_last_build(project, branch, pipeline, job)
+                    date = build.get('start_time', '')
+
+                    if date and (datetime.now()
+                                 - datetime.fromisoformat(date)).days > 14:
+                        build['result'] = '---'
+                        build['log_url'] = ''
 
                     builds.append(Build(
                         project=project,

From 5531d5eb8632d6ae0e1030524f45e973f5e9203f Mon Sep 17 00:00:00 2001
From: Szymon Datko <sdatko@redhat.com>
Date: Tue, 6 Dec 2022 17:22:48 +0100
Subject: [PATCH 2/5] Display the number of all meaningful results

Right now there is only number of failures displayed
in summary, but to have some better context we want
to know how many builds were recently triggered.
---
 EoD-stuff/find-zuul-jobs-failures.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/EoD-stuff/find-zuul-jobs-failures.py b/EoD-stuff/find-zuul-jobs-failures.py
index c5a40fb..3433019 100644
--- a/EoD-stuff/find-zuul-jobs-failures.py
+++ b/EoD-stuff/find-zuul-jobs-failures.py
@@ -197,6 +197,8 @@ def find_failure_reason(url: str):
 
 
 def get_bad_results(builds: list[Build]) -> dict:
+    successes = sum([build.result == 'SUCCESS' for build in builds])
+
     builds = [build for build in builds
               if (build.result not in ('SUCCESS', '---')
                   and build.log_url != '')]
@@ -222,7 +224,8 @@ def get_bad_results(builds: list[Build]) -> dict:
         i += 1
         progress(i, end)
 
-    print('Number of failed builds:', len(results))
+    failures = len(results)
+    print('Number of failed builds:', failures, '/', failures + successes)
 
     return results
 

From 891b22e51ee50084cd53c71d31bd275a3eb9de73 Mon Sep 17 00:00:00 2001
From: David Sariel <dsariel@redhat.com>
Date: Mon, 5 Dec 2022 16:58:30 +0200
Subject: [PATCH 3/5] Compare unified job deployments to DFG job deployments

- Compare per rhos XY.Z
- Create a spreadsheet (tab per unified job)
- For each tab list DFG jobs (according to the similarity measure)
---
 README.md                                     |   2 +-
 similarity-comparison/.gitignore              |   3 +
 similarity-comparison/README.md               |   9 +
 similarity-comparison/requirements.txt        |   3 +
 .../similarity_comparison.py                  | 260 ++++++++++++++++++
 5 files changed, 276 insertions(+), 1 deletion(-)
 create mode 100644 similarity-comparison/.gitignore
 create mode 100644 similarity-comparison/README.md
 create mode 100644 similarity-comparison/requirements.txt
 create mode 100644 similarity-comparison/similarity_comparison.py

diff --git a/README.md b/README.md
index 0eac12e..8cdf0af 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 ### <span stype="color:blue">[gerrit-rechecks](gerrit-rechecks/README.md): Scripts to get the last comment in Gerrit by Data, Submit Changes to Gerrit Projects with "Depends-On" and Reverify Gerrit Changes.</span>
 ### <span stype="color:blue">[EoD-stuff](EoD-stuff/README.md): Scripts for making life easier to the Engineer on Duty :)</span>
 ### <span stype="color:blue">[provision](provision/README.md): Different implementation of ansible roles and playbooks to automate things.</span>
+### <span stype="color:blue">[similarity-comparison](similarity-comparison/README.md): Scripts for comparison of infrared based jenkins jobs</span>
 Engineer on Duty
 
-
 ![](https://github.com/RedHatCRE/toolbox/workflows/tests/badge.svg)
diff --git a/similarity-comparison/.gitignore b/similarity-comparison/.gitignore
new file mode 100644
index 0000000..513aea0
--- /dev/null
+++ b/similarity-comparison/.gitignore
@@ -0,0 +1,3 @@
+jjs.db
+jjs.xlsx
+venv/**
diff --git a/similarity-comparison/README.md b/similarity-comparison/README.md
new file mode 100644
index 0000000..fc854cf
--- /dev/null
+++ b/similarity-comparison/README.md
@@ -0,0 +1,9 @@
+HOWTO
+-----
+virtualenv venv
+. ./venv/bin/activate
+pip install -r requirements.txt
+python similarity-comparison.py 
+
+
+
diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt
new file mode 100644
index 0000000..a4ae7cc
--- /dev/null
+++ b/similarity-comparison/requirements.txt
@@ -0,0 +1,3 @@
+requests
+xlsxwriter
+scikit-learn
diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py
new file mode 100644
index 0000000..d152d37
--- /dev/null
+++ b/similarity-comparison/similarity_comparison.py
@@ -0,0 +1,260 @@
+import configparser
+import json
+import re
+import requests
+import sqlite3
+import xlsxwriter
+
+from os.path import expanduser
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+httpRequest = {
+    'requestJobsAndBuildInfo':
+        "/api/json/?tree=jobs[name,lastBuild[result,number,timestamp]]",
+    'requestJobs':
+        "/api/json?tree=jobs[name]",
+    'requestStableBuildArtifact':
+        "/job/{jobName}/lastStableBuild/artifact/{artifactPath}",
+    'requestArtifact':
+        "/job/{jobName}/lastSuccessfulBuild/artifact/{artifactPath}"
+}
+
+
+# JJSC - Jenkins Jobs Similarity Computation
+class JJSC(object):
+    def __init__(self, credentialsPath, artifactPath):
+        configParser = configparser.RawConfigParser()
+        print(configParser.read(credentialsPath))
+        sectionName = "jenkins"
+        dictionary = dict(configParser.items(sectionName))
+
+        self.url = dictionary['url']
+        self.artifactPath = artifactPath
+        self.credentials = (dictionary['user'], dictionary['password'])
+
+        # create (if !exists) a db to store <jobName, artifact>
+        self.dbcon = sqlite3.connect('jjs.db')
+        print("Connected to SQLite jjs.db")
+        cursor = self.dbcon.cursor()
+        cursor.execute('''CREATE TABLE IF NOT EXISTS jjs
+                            ( jobName text,
+                              artifatcContent text,
+                              artifactCtntNrmlzd text )''')
+        self.dbcon.commit()
+        cursor.close()
+        print("jjs table exists in jjs.db")
+
+        self.workbook = xlsxwriter.Workbook('jjs.xlsx')
+
+    def __del__(self):
+        if self.dbcon:
+            self.dbcon.close()
+            print("The SQLite connection is closed")
+        self.workbook.close()
+
+    def _insertDataIntoTable(self, jobName, artifatcContent):
+        try:
+            cursor = self.dbcon.cursor()
+            sqlite_insert_with_param = """INSERT INTO jjs
+                              (jobName, artifatcContent)
+                              VALUES (?, ?);"""
+            data_tuple = (jobName, artifatcContent)
+            cursor.execute(sqlite_insert_with_param, data_tuple)
+            self.dbcon.commit()
+            cursor.close()
+            return 0
+
+        except sqlite3.Error as error:
+            print("Failed to insert into sqlite table", error)
+            return -1
+
+    def populateDB(self):
+        # get all Jobs
+        request = requests.get(self.url + httpRequest['requestJobs'],
+                               verify=False,
+                               auth=self.credentials)
+        jobsInJSON = json.loads(request.text)
+        print(json.dumps(jobsInJSON, indent=4, sort_keys=True))
+
+        skipList = ["util"]
+
+        # get and store an artifact (if found)
+        okCounter = 0
+        insertCounter = 0
+        for element in jobsInJSON['jobs']:
+            print(element['name'])
+            jobName = element['name']
+            if jobName in skipList:
+                continue
+            requestStr = self.url + httpRequest['requestArtifact'].format(
+                jobName=jobName,
+                artifactPath=self.artifactPath)
+            request = requests.get(requestStr, verify=False,
+                                   auth=self.credentials)
+            print(requestStr)
+            if request.ok:
+                okCounter = okCounter + 1
+                if self._insertDataIntoTable(jobName, request.text) >= 0:
+                    insertCounter = insertCounter + 1
+
+        print("From populateDB")
+        print("okCounter: " + str(okCounter))
+        print("insertCounter: " + str(insertCounter))
+        print("number of jobs: " + str(len(jobsInJSON['jobs'])))
+        assert (okCounter == insertCounter)
+
+    def _normilizeArtifact(self, artifact):
+        regex = r".*infrared (tripleo-undercloud|tripleo-overcloud) .*\\*"
+        plugin_names = "(tripleo-undercloud|tripleo-overcloud)"
+        regex = r".*infrared " + plugin_names + " .*(([\r\n]*).*){4}"
+        matches = re.finditer(regex, artifact, re.MULTILINE)
+        normalizedArtifact = ""
+        for matchNum, match in enumerate(matches, start=1):
+            print(
+                "Match {matchNum} was found at {start}-{end}: {match}".format(
+                    matchNum=matchNum,
+                    start=match.start(),
+                    end=match.end(),
+                    match=match.group()))
+            normalizedArtifact = normalizedArtifact + "\n" + match.group()
+
+        # TODO: filter out tempest invocation - DONE
+        return (normalizedArtifact)
+
+    def _extractVersionFromJobName(self, jobName):
+        # matches XY.Z XY XY_Z in job names
+        REGEXP = r'\s*([\d(.|_)]+)(_compact|-compact|_director|-director)\s*'
+
+        version = re.search(REGEXP, jobName).group(1)
+        version = version.replace("_", ".")  # for jobs with XY_Z
+
+        return version
+
+    def _extractIPVersionFromJobName(self, jobName):
+        # matches XY.Z XY XY_Z in job names
+        REGEXP = r".*ipv([\d]+).*"
+
+        try:
+            version = re.search(REGEXP, jobName).group(1)
+        except AttributeError:
+            version = "NA"
+
+        return version
+
+    # return true if artifact contains any of filter out criteria
+    def _isFilteredOut(self, articact):
+        filter = ["infrared tripleo-inventory",
+                  "infrared workspace import",
+                  "sshpass -p stack ssh -o UserKnownHostsFile=/dev/null",
+                  "infrared tripleo-upgrade"]
+
+        articactString = str(articact)
+
+        intersestoin = [value for value in filter if value in articactString]
+
+        return (len(intersestoin) > 0)
+
+    def analyseJJSTable(self):
+        cursor = self.dbcon.cursor()
+
+        # fetch unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%unified%\' AND jobName LIKE \'%director%\' ORDER BY jobName'
+        cursor.execute(sql_command)
+        unifiedJobs = cursor.fetchall()
+        print("Total of unified jobs are:  ", len(unifiedJobs))
+
+        # fetch other director jobs (including unified ones) to compare
+        # against the unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%director%\' AND jobName NOT LIKE \'%compact%\''
+        cursor.execute(sql_command)
+        directorJobs = cursor.fetchall()
+        print("Total of director jobs are:  ", len(directorJobs))
+
+        unifiedJobsCounter = 0
+        cell_format = self.workbook.add_format(
+            {'bold': True, 'font_color': 'red'})
+        for rowUnified in unifiedJobs:
+            jobNameUnified = str(rowUnified[0])
+            print(len(unifiedJobs))
+            try:
+                unifiedJobsCounter += 1
+                worksheet = self.workbook.add_worksheet(
+                    jobNameUnified[1:28] + "--" + str(unifiedJobsCounter))
+                worksheet.set_column(0, 0, len(jobNameUnified))
+                worksheet.write(0, 0, jobNameUnified, cell_format)
+                row = 1
+            except xlsxwriter.exceptions.DuplicateWorksheetName:
+                continue
+            for rowDirector in directorJobs:
+                jobNameDirector = str(rowDirector[0])
+                releaseUnified = self._extractVersionFromJobName(
+                    jobNameUnified)
+                releaseDirector = self._extractVersionFromJobName(
+                    jobNameDirector)
+                ipVersionUnifed = self._extractIPVersionFromJobName(
+                    jobNameUnified)
+                ipVersionDirector = self._extractIPVersionFromJobName(
+                    jobNameDirector)
+                # if releaseUnified not in ["16.1", "16.2"]:
+                #     continue
+
+                if jobNameUnified != jobNameDirector and \
+                        releaseUnified == releaseDirector and \
+                        ipVersionUnifed == ipVersionDirector:
+                    artifactUnified = str(rowUnified[1])
+                    artifactDirector = str(rowDirector[1])
+                    if self._isFilteredOut(artifactDirector):
+                        continue
+                    normalizedUnified = self._normilizeArtifact(
+                        artifactUnified)
+                    normalizedDirector = self._normilizeArtifact(
+                        artifactDirector)
+                    try:
+                        tfidf = TfidfVectorizer().fit_transform(
+                            [normalizedUnified, normalizedDirector])
+                        # no need to normalize, since Vectorizer will return
+                        # normalized tf-idf
+                        pairwise_similarity = tfidf * tfidf.T
+                    except Exception:
+                        print("Can not compare " + rowUnified[0] + " and " +
+                              rowDirector[0] + "\n")
+                    threshold = pairwise_similarity.data.min()
+
+                    if threshold >= 0.0:
+                        wordsUnified = set(normalizedUnified.split())
+                        wordsDirector = set(normalizedDirector.split())
+                        unifiedUniques = set(
+                            sorted(wordsUnified.difference(wordsDirector)))
+                        directorUniques = set(
+                            sorted(wordsDirector.difference(wordsUnified)))
+                        uniques = unifiedUniques.union(directorUniques)
+                        print(jobNameUnified + "," + str(unifiedUniques))
+                        print(jobNameDirector + "," + str(directorUniques))
+                        fstr = 'Total uniques: {}, Pairwise Similarity: {}\n'
+                        print(fstr.format(len(uniques), threshold))
+                        try:
+                            worksheet.set_column(row, 0, len(jobNameDirector))
+                            worksheet.write(row, 0, jobNameDirector)
+
+                            threshold = round(threshold, 3)
+                            worksheet.set_column(row, 1, len(str(threshold)))
+                            worksheet.write(row, 1, str(threshold))
+
+                            row = row + 1
+                        except Exception as e:
+                            print(e)
+                            continue
+        cursor.close()
+
+
+credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini'
+artifactPath = '.sh/run.sh'
+jjsc = JJSC(credentialsPath, artifactPath)
+jjsc.populateDB()
+jjsc.analyseJJSTable()
+del jjsc

From 5fcd1490e3fbc7f50f1ea8da39fba0480c8a6590 Mon Sep 17 00:00:00 2001
From: David Sariel <dsariel@redhat.com>
Date: Thu, 8 Dec 2022 00:08:10 +0200
Subject: [PATCH 4/5] Using modified infarred for parsing plugin arguments

OSPCRE-875
---
 similarity-comparison/infrared_agrs_patch     | 36 ++++++++++++++++
 similarity-comparison/requirements.txt        |  3 +-
 .../similarity_comparison.py                  | 42 +++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 similarity-comparison/infrared_agrs_patch

diff --git a/similarity-comparison/infrared_agrs_patch b/similarity-comparison/infrared_agrs_patch
new file mode 100644
index 0000000..26c532c
--- /dev/null
+++ b/similarity-comparison/infrared_agrs_patch
@@ -0,0 +1,36 @@
+diff --git a/infrared/api.py b/infrared/api.py
+index e88b2949..6fc7f77a 100644
+--- a/infrared/api.py
++++ b/infrared/api.py
+@@ -116,6 +116,12 @@ class InfraredPluginsSpec(SpecObject):
+         # unpack parsed arguments
+         nested_args, control_args, custom_args = parsed_args
+ 
++        # print to stdout and serialize nested agruments. Finally, terminate.
++        import pickle
++        print (nested_args)
++        with open('nested_args.pkl', 'wb') as f:
++            pickle.dump(nested_args, f)
++        exit(0)
+         if control_args.get('debug', None):
+             logger.LOG.setLevel(logging.DEBUG)
+ 
+@@ -198,12 +204,18 @@ class SpecManager(object):
+ 
+     def run_specs(self, args=None):
+         spec_args = vars(self.parser.parse_args(args))
++        print(spec_args)
+         subcommand = spec_args.get('subcommand', '')
+         if not spec_args.get('no_log_commands'):
+             if self.execution_logger is None:
+                 self.execution_logger = CoreServices.execution_logger_manager()
+             self.execution_logger.command()
+ 
++        # serialize subcommand
++        import pickle
++        with open('subcommand.pkl', 'wb') as f:
++            pickle.dump(subcommand, f)
++
+         if subcommand in self.spec_objects:
+             return self.spec_objects[subcommand].spec_handler(
+                 self.parser, args=args)
diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt
index a4ae7cc..9162eaf 100644
--- a/similarity-comparison/requirements.txt
+++ b/similarity-comparison/requirements.txt
@@ -1,3 +1,4 @@
+gitpython
 requests
-xlsxwriter
 scikit-learn
+xlsxwriter
diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py
index d152d37..384cbe7 100644
--- a/similarity-comparison/similarity_comparison.py
+++ b/similarity-comparison/similarity_comparison.py
@@ -3,8 +3,11 @@
 import re
 import requests
 import sqlite3
+import subprocess
+import sys
 import xlsxwriter
 
+from git import Repo
 from os.path import expanduser
 from sklearn.feature_extraction.text import TfidfVectorizer
 
@@ -21,6 +24,16 @@
 }
 
 
+def get_base_prefix_compat():
+    """Get base/real prefix, or sys.prefix if there is none."""
+    return getattr(sys, "base_prefix", None) or getattr(sys, "real_prefix",
+                                                        None) or sys.prefix
+
+
+def in_virtualenv():
+    return get_base_prefix_compat() != sys.prefix
+
+
 # JJSC - Jenkins Jobs Similarity Computation
 class JJSC(object):
     def __init__(self, credentialsPath, artifactPath):
@@ -53,6 +66,32 @@ def __del__(self):
             print("The SQLite connection is closed")
         self.workbook.close()
 
+    def _prepare_arg_parsing_and_serialization(self):
+        # clone infrared
+        git_url = "https://github.com/redhat-openstack/infrared.git"
+        repo_dir = "/tmp/infrared"
+        subprocess.call("rm -rf " + repo_dir, shell=True)
+        Repo.clone_from(git_url, repo_dir)
+
+        # apply the arg serialization patch
+        command = "cp infrared_agrs_patch " + repo_dir + ";" + \
+                  "cd " + repo_dir + ";" + \
+                  "git apply infrared_agrs_patch"
+        subprocess.call(command, shell=True)
+
+        # install infarred in a virtual environment
+        if (not in_virtualenv()):
+            raise Exception("This code installs pip packages and is " +
+                  "adviced to be executed in a virtual environment")
+
+        command = "cd " + repo_dir + ";" + \
+                  "pip install - U pip;" + \
+                  "pip install ."
+        subprocess.call(command, shell=True)
+
+        # add additional plugins for enhanced parsing
+        subprocess.call("infrared plugin add all", shell=True)
+
     def _insertDataIntoTable(self, jobName, artifatcContent):
         try:
             cursor = self.dbcon.cursor()
@@ -255,6 +294,9 @@ def analyseJJSTable(self):
 credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini'
 artifactPath = '.sh/run.sh'
 jjsc = JJSC(credentialsPath, artifactPath)
+
+jjsc._prepare_arg_parsing_and_serialization()
+
 jjsc.populateDB()
 jjsc.analyseJJSTable()
 del jjsc

From a329afb9e013cfbd0336f89f66aeab77f63dfa40 Mon Sep 17 00:00:00 2001
From: David Sariel <dsariel@redhat.com>
Date: Sun, 22 Jan 2023 20:23:27 +0200
Subject: [PATCH 5/5] install tox<4.0

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 0de6398..c3c5282 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Packages
         run: |
           apk add findutils git python3 py3-pip shellcheck
-          pip3 install tox
+          pip3 install tox<4
       - name: Checkout
         uses: actions/checkout@v3
       - name: Tox