From 6364702ccbbe1b71f2bc8ce8e429a0f15b581c1a Mon Sep 17 00:00:00 2001 From: Szymon Datko Date: Tue, 6 Dec 2022 17:20:44 +0100 Subject: [PATCH 1/5] Ignore old results If there are builds in weekly pipeline older than 14 days then such job is no more interesting for us (probably was deleted and we do not monitor it anymore). --- EoD-stuff/find-zuul-jobs-failures.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/EoD-stuff/find-zuul-jobs-failures.py b/EoD-stuff/find-zuul-jobs-failures.py index 47a841c..c5a40fb 100644 --- a/EoD-stuff/find-zuul-jobs-failures.py +++ b/EoD-stuff/find-zuul-jobs-failures.py @@ -102,6 +102,12 @@ def get_builds() -> list: for pipeline in PIPELINES: for job in JOBS: build = get_last_build(project, branch, pipeline, job) + date = build.get('start_time', '') + + if date and (datetime.now() + - datetime.fromisoformat(date)).days > 14: + build['result'] = '---' + build['log_url'] = '' builds.append(Build( project=project, From 5531d5eb8632d6ae0e1030524f45e973f5e9203f Mon Sep 17 00:00:00 2001 From: Szymon Datko Date: Tue, 6 Dec 2022 17:22:48 +0100 Subject: [PATCH 2/5] Display the number of all meaningful results Right now there is only number of failures displayed in summary, but to have some better context we want to know how many builds were recently triggered. --- EoD-stuff/find-zuul-jobs-failures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/EoD-stuff/find-zuul-jobs-failures.py b/EoD-stuff/find-zuul-jobs-failures.py index c5a40fb..3433019 100644 --- a/EoD-stuff/find-zuul-jobs-failures.py +++ b/EoD-stuff/find-zuul-jobs-failures.py @@ -197,6 +197,8 @@ def find_failure_reason(url: str): def get_bad_results(builds: list[Build]) -> dict: + successes = sum([build.result == 'SUCCESS' for build in builds]) + builds = [build for build in builds if (build.result not in ('SUCCESS', '---') and build.log_url != '')] @@ -222,7 +224,8 @@ def get_bad_results(builds: list[Build]) -> dict: i += 1 progress(i, end) - print('Number of failed builds:', len(results)) + failures = len(results) + print('Number of failed builds:', failures, '/', failures + successes) return results From 891b22e51ee50084cd53c71d31bd275a3eb9de73 Mon Sep 17 00:00:00 2001 From: David Sariel Date: Mon, 5 Dec 2022 16:58:30 +0200 Subject: [PATCH 3/5] Compare unified job deployments to DFG job deployments - Compare per rhos XY.Z - Create a spreadsheet (tab per unified job) - For each tab list DFG jobs (according to the similarity measure) --- README.md | 2 +- similarity-comparison/.gitignore | 3 + similarity-comparison/README.md | 9 + similarity-comparison/requirements.txt | 3 + .../similarity_comparison.py | 260 ++++++++++++++++++ 5 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 similarity-comparison/.gitignore create mode 100644 similarity-comparison/README.md create mode 100644 similarity-comparison/requirements.txt create mode 100644 similarity-comparison/similarity_comparison.py diff --git a/README.md b/README.md index 0eac12e..8cdf0af 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ### [gerrit-rechecks](gerrit-rechecks/README.md): Scripts to get the last comment in Gerrit by Data, Submit Changes to Gerrit Projects with "Depends-On" and Reverify Gerrit Changes. ### [EoD-stuff](EoD-stuff/README.md): Scripts for making life easier to the Engineer on Duty :) ### [provision](provision/README.md): Different implementation of ansible roles and playbooks to automate things. +### [similarity-comparison](similarity-comparison/README.md): Scripts for comparison of infrared based jenkins jobs Engineer on Duty - ![](https://github.com/RedHatCRE/toolbox/workflows/tests/badge.svg) diff --git a/similarity-comparison/.gitignore b/similarity-comparison/.gitignore new file mode 100644 index 0000000..513aea0 --- /dev/null +++ b/similarity-comparison/.gitignore @@ -0,0 +1,3 @@ +jjs.db +jjs.xlsx +venv/** diff --git a/similarity-comparison/README.md b/similarity-comparison/README.md new file mode 100644 index 0000000..fc854cf --- /dev/null +++ b/similarity-comparison/README.md @@ -0,0 +1,9 @@ +HOWTO +----- +virtualenv venv +. ./venv/bin/activate +pip install -r requirements.txt +python similarity-comparison.py + + + diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt new file mode 100644 index 0000000..a4ae7cc --- /dev/null +++ b/similarity-comparison/requirements.txt @@ -0,0 +1,3 @@ +requests +xlsxwriter +scikit-learn diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py new file mode 100644 index 0000000..d152d37 --- /dev/null +++ b/similarity-comparison/similarity_comparison.py @@ -0,0 +1,260 @@ +import configparser +import json +import re +import requests +import sqlite3 +import xlsxwriter + +from os.path import expanduser +from sklearn.feature_extraction.text import TfidfVectorizer + + +httpRequest = { + 'requestJobsAndBuildInfo': + "/api/json/?tree=jobs[name,lastBuild[result,number,timestamp]]", + 'requestJobs': + "/api/json?tree=jobs[name]", + 'requestStableBuildArtifact': + "/job/{jobName}/lastStableBuild/artifact/{artifactPath}", + 'requestArtifact': + "/job/{jobName}/lastSuccessfulBuild/artifact/{artifactPath}" +} + + +# JJSC - Jenkins Jobs Similarity Computation +class JJSC(object): + def __init__(self, credentialsPath, artifactPath): + configParser = configparser.RawConfigParser() + print(configParser.read(credentialsPath)) + sectionName = "jenkins" + dictionary = dict(configParser.items(sectionName)) + + self.url = dictionary['url'] + self.artifactPath = artifactPath + self.credentials = (dictionary['user'], dictionary['password']) + + # create (if !exists) a db to store + self.dbcon = sqlite3.connect('jjs.db') + print("Connected to SQLite jjs.db") + cursor = self.dbcon.cursor() + cursor.execute('''CREATE TABLE IF NOT EXISTS jjs + ( jobName text, + artifatcContent text, + artifactCtntNrmlzd text )''') + self.dbcon.commit() + cursor.close() + print("jjs table exists in jjs.db") + + self.workbook = xlsxwriter.Workbook('jjs.xlsx') + + def __del__(self): + if self.dbcon: + self.dbcon.close() + print("The SQLite connection is closed") + self.workbook.close() + + def _insertDataIntoTable(self, jobName, artifatcContent): + try: + cursor = self.dbcon.cursor() + sqlite_insert_with_param = """INSERT INTO jjs + (jobName, artifatcContent) + VALUES (?, ?);""" + data_tuple = (jobName, artifatcContent) + cursor.execute(sqlite_insert_with_param, data_tuple) + self.dbcon.commit() + cursor.close() + return 0 + + except sqlite3.Error as error: + print("Failed to insert into sqlite table", error) + return -1 + + def populateDB(self): + # get all Jobs + request = requests.get(self.url + httpRequest['requestJobs'], + verify=False, + auth=self.credentials) + jobsInJSON = json.loads(request.text) + print(json.dumps(jobsInJSON, indent=4, sort_keys=True)) + + skipList = ["util"] + + # get and store an artifact (if found) + okCounter = 0 + insertCounter = 0 + for element in jobsInJSON['jobs']: + print(element['name']) + jobName = element['name'] + if jobName in skipList: + continue + requestStr = self.url + httpRequest['requestArtifact'].format( + jobName=jobName, + artifactPath=self.artifactPath) + request = requests.get(requestStr, verify=False, + auth=self.credentials) + print(requestStr) + if request.ok: + okCounter = okCounter + 1 + if self._insertDataIntoTable(jobName, request.text) >= 0: + insertCounter = insertCounter + 1 + + print("From populateDB") + print("okCounter: " + str(okCounter)) + print("insertCounter: " + str(insertCounter)) + print("number of jobs: " + str(len(jobsInJSON['jobs']))) + assert (okCounter == insertCounter) + + def _normilizeArtifact(self, artifact): + regex = r".*infrared (tripleo-undercloud|tripleo-overcloud) .*\\*" + plugin_names = "(tripleo-undercloud|tripleo-overcloud)" + regex = r".*infrared " + plugin_names + " .*(([\r\n]*).*){4}" + matches = re.finditer(regex, artifact, re.MULTILINE) + normalizedArtifact = "" + for matchNum, match in enumerate(matches, start=1): + print( + "Match {matchNum} was found at {start}-{end}: {match}".format( + matchNum=matchNum, + start=match.start(), + end=match.end(), + match=match.group())) + normalizedArtifact = normalizedArtifact + "\n" + match.group() + + # TODO: filter out tempest invocation - DONE + return (normalizedArtifact) + + def _extractVersionFromJobName(self, jobName): + # matches XY.Z XY XY_Z in job names + REGEXP = r'\s*([\d(.|_)]+)(_compact|-compact|_director|-director)\s*' + + version = re.search(REGEXP, jobName).group(1) + version = version.replace("_", ".") # for jobs with XY_Z + + return version + + def _extractIPVersionFromJobName(self, jobName): + # matches XY.Z XY XY_Z in job names + REGEXP = r".*ipv([\d]+).*" + + try: + version = re.search(REGEXP, jobName).group(1) + except AttributeError: + version = "NA" + + return version + + # return true if artifact contains any of filter out criteria + def _isFilteredOut(self, articact): + filter = ["infrared tripleo-inventory", + "infrared workspace import", + "sshpass -p stack ssh -o UserKnownHostsFile=/dev/null", + "infrared tripleo-upgrade"] + + articactString = str(articact) + + intersestoin = [value for value in filter if value in articactString] + + return (len(intersestoin) > 0) + + def analyseJJSTable(self): + cursor = self.dbcon.cursor() + + # fetch unified jobs + sql_command = \ + 'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \ + '\'%unified%\' AND jobName LIKE \'%director%\' ORDER BY jobName' + cursor.execute(sql_command) + unifiedJobs = cursor.fetchall() + print("Total of unified jobs are: ", len(unifiedJobs)) + + # fetch other director jobs (including unified ones) to compare + # against the unified jobs + sql_command = \ + 'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \ + '\'%director%\' AND jobName NOT LIKE \'%compact%\'' + cursor.execute(sql_command) + directorJobs = cursor.fetchall() + print("Total of director jobs are: ", len(directorJobs)) + + unifiedJobsCounter = 0 + cell_format = self.workbook.add_format( + {'bold': True, 'font_color': 'red'}) + for rowUnified in unifiedJobs: + jobNameUnified = str(rowUnified[0]) + print(len(unifiedJobs)) + try: + unifiedJobsCounter += 1 + worksheet = self.workbook.add_worksheet( + jobNameUnified[1:28] + "--" + str(unifiedJobsCounter)) + worksheet.set_column(0, 0, len(jobNameUnified)) + worksheet.write(0, 0, jobNameUnified, cell_format) + row = 1 + except xlsxwriter.exceptions.DuplicateWorksheetName: + continue + for rowDirector in directorJobs: + jobNameDirector = str(rowDirector[0]) + releaseUnified = self._extractVersionFromJobName( + jobNameUnified) + releaseDirector = self._extractVersionFromJobName( + jobNameDirector) + ipVersionUnifed = self._extractIPVersionFromJobName( + jobNameUnified) + ipVersionDirector = self._extractIPVersionFromJobName( + jobNameDirector) + # if releaseUnified not in ["16.1", "16.2"]: + # continue + + if jobNameUnified != jobNameDirector and \ + releaseUnified == releaseDirector and \ + ipVersionUnifed == ipVersionDirector: + artifactUnified = str(rowUnified[1]) + artifactDirector = str(rowDirector[1]) + if self._isFilteredOut(artifactDirector): + continue + normalizedUnified = self._normilizeArtifact( + artifactUnified) + normalizedDirector = self._normilizeArtifact( + artifactDirector) + try: + tfidf = TfidfVectorizer().fit_transform( + [normalizedUnified, normalizedDirector]) + # no need to normalize, since Vectorizer will return + # normalized tf-idf + pairwise_similarity = tfidf * tfidf.T + except Exception: + print("Can not compare " + rowUnified[0] + " and " + + rowDirector[0] + "\n") + threshold = pairwise_similarity.data.min() + + if threshold >= 0.0: + wordsUnified = set(normalizedUnified.split()) + wordsDirector = set(normalizedDirector.split()) + unifiedUniques = set( + sorted(wordsUnified.difference(wordsDirector))) + directorUniques = set( + sorted(wordsDirector.difference(wordsUnified))) + uniques = unifiedUniques.union(directorUniques) + print(jobNameUnified + "," + str(unifiedUniques)) + print(jobNameDirector + "," + str(directorUniques)) + fstr = 'Total uniques: {}, Pairwise Similarity: {}\n' + print(fstr.format(len(uniques), threshold)) + try: + worksheet.set_column(row, 0, len(jobNameDirector)) + worksheet.write(row, 0, jobNameDirector) + + threshold = round(threshold, 3) + worksheet.set_column(row, 1, len(str(threshold))) + worksheet.write(row, 1, str(threshold)) + + row = row + 1 + except Exception as e: + print(e) + continue + cursor.close() + + +credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini' +artifactPath = '.sh/run.sh' +jjsc = JJSC(credentialsPath, artifactPath) +jjsc.populateDB() +jjsc.analyseJJSTable() +del jjsc From 5fcd1490e3fbc7f50f1ea8da39fba0480c8a6590 Mon Sep 17 00:00:00 2001 From: David Sariel Date: Thu, 8 Dec 2022 00:08:10 +0200 Subject: [PATCH 4/5] Using modified infarred for parsing plugin arguments OSPCRE-875 --- similarity-comparison/infrared_agrs_patch | 36 ++++++++++++++++ similarity-comparison/requirements.txt | 3 +- .../similarity_comparison.py | 42 +++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 similarity-comparison/infrared_agrs_patch diff --git a/similarity-comparison/infrared_agrs_patch b/similarity-comparison/infrared_agrs_patch new file mode 100644 index 0000000..26c532c --- /dev/null +++ b/similarity-comparison/infrared_agrs_patch @@ -0,0 +1,36 @@ +diff --git a/infrared/api.py b/infrared/api.py +index e88b2949..6fc7f77a 100644 +--- a/infrared/api.py ++++ b/infrared/api.py +@@ -116,6 +116,12 @@ class InfraredPluginsSpec(SpecObject): + # unpack parsed arguments + nested_args, control_args, custom_args = parsed_args + ++ # print to stdout and serialize nested agruments. Finally, terminate. ++ import pickle ++ print (nested_args) ++ with open('nested_args.pkl', 'wb') as f: ++ pickle.dump(nested_args, f) ++ exit(0) + if control_args.get('debug', None): + logger.LOG.setLevel(logging.DEBUG) + +@@ -198,12 +204,18 @@ class SpecManager(object): + + def run_specs(self, args=None): + spec_args = vars(self.parser.parse_args(args)) ++ print(spec_args) + subcommand = spec_args.get('subcommand', '') + if not spec_args.get('no_log_commands'): + if self.execution_logger is None: + self.execution_logger = CoreServices.execution_logger_manager() + self.execution_logger.command() + ++ # serialize subcommand ++ import pickle ++ with open('subcommand.pkl', 'wb') as f: ++ pickle.dump(subcommand, f) ++ + if subcommand in self.spec_objects: + return self.spec_objects[subcommand].spec_handler( + self.parser, args=args) diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt index a4ae7cc..9162eaf 100644 --- a/similarity-comparison/requirements.txt +++ b/similarity-comparison/requirements.txt @@ -1,3 +1,4 @@ +gitpython requests -xlsxwriter scikit-learn +xlsxwriter diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py index d152d37..384cbe7 100644 --- a/similarity-comparison/similarity_comparison.py +++ b/similarity-comparison/similarity_comparison.py @@ -3,8 +3,11 @@ import re import requests import sqlite3 +import subprocess +import sys import xlsxwriter +from git import Repo from os.path import expanduser from sklearn.feature_extraction.text import TfidfVectorizer @@ -21,6 +24,16 @@ } +def get_base_prefix_compat(): + """Get base/real prefix, or sys.prefix if there is none.""" + return getattr(sys, "base_prefix", None) or getattr(sys, "real_prefix", + None) or sys.prefix + + +def in_virtualenv(): + return get_base_prefix_compat() != sys.prefix + + # JJSC - Jenkins Jobs Similarity Computation class JJSC(object): def __init__(self, credentialsPath, artifactPath): @@ -53,6 +66,32 @@ def __del__(self): print("The SQLite connection is closed") self.workbook.close() + def _prepare_arg_parsing_and_serialization(self): + # clone infrared + git_url = "https://github.com/redhat-openstack/infrared.git" + repo_dir = "/tmp/infrared" + subprocess.call("rm -rf " + repo_dir, shell=True) + Repo.clone_from(git_url, repo_dir) + + # apply the arg serialization patch + command = "cp infrared_agrs_patch " + repo_dir + ";" + \ + "cd " + repo_dir + ";" + \ + "git apply infrared_agrs_patch" + subprocess.call(command, shell=True) + + # install infarred in a virtual environment + if (not in_virtualenv()): + raise Exception("This code installs pip packages and is " + + "adviced to be executed in a virtual environment") + + command = "cd " + repo_dir + ";" + \ + "pip install - U pip;" + \ + "pip install ." + subprocess.call(command, shell=True) + + # add additional plugins for enhanced parsing + subprocess.call("infrared plugin add all", shell=True) + def _insertDataIntoTable(self, jobName, artifatcContent): try: cursor = self.dbcon.cursor() @@ -255,6 +294,9 @@ def analyseJJSTable(self): credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini' artifactPath = '.sh/run.sh' jjsc = JJSC(credentialsPath, artifactPath) + +jjsc._prepare_arg_parsing_and_serialization() + jjsc.populateDB() jjsc.analyseJJSTable() del jjsc From a329afb9e013cfbd0336f89f66aeab77f63dfa40 Mon Sep 17 00:00:00 2001 From: David Sariel Date: Sun, 22 Jan 2023 20:23:27 +0200 Subject: [PATCH 5/5] install tox<4.0 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0de6398..c3c5282 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: - name: Packages run: | apk add findutils git python3 py3-pip shellcheck - pip3 install tox + pip3 install tox<4 - name: Checkout uses: actions/checkout@v3 - name: Tox