Skip to content

Commit 7abed2f

Browse files
authored
Merge pull request #463 from JohT/feature/anomaly-detection-treemap-visualization
Add Treemap visualization to anomaly detection
2 parents 561e305 + a44c645 commit 7abed2f

File tree

7 files changed

+1570
-7
lines changed

7 files changed

+1570
-7
lines changed

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 68 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ set -o errexit -o pipefail
1515
# Overrideable Constants (defaults also defined in sub scripts)
1616
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
1717

18+
MARKDOWN_INCLUDES_DIRECTORY=${MARKDOWN_INCLUDES_DIRECTORY:-"includes"} # Subdirectory that contains Markdown files to be included by the Markdown template for the report.
19+
1820
## Get this "scripts/reports" directory if not already set
1921
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
2022
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
2123
# This way non-standard tools like readlink aren't needed.
2224
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
23-
echo "anomalyDetectionPipeline: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
25+
echo "anomalyDetectionPython: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
2426
# Get the "scripts" directory by taking the path of this script and going one directory up.
2527
SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/../../scripts"} # Repository directory containing the shell scripts
2628
# Get the "cypher" query directory for gathering features.
@@ -49,7 +51,7 @@ while [[ $# -gt 0 ]]; do
4951
verboseMode="--verbose"
5052
;;
5153
*)
52-
echo -e "${COLOR_ERROR}anomalyDetectionPipeline: Error: Unknown option: ${key}${COLOR_DEFAULT}" >&2
54+
echo -e "${COLOR_ERROR}anomalyDetectionPython: Error: Unknown option: ${key}${COLOR_DEFAULT}" >&2
5355
usage
5456
;;
5557
esac
@@ -72,10 +74,10 @@ is_sufficient_data_available() {
7274
query_result=$( execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionNodeCount.cypher" "${@}" )
7375
node_count=$(get_csv_column_value "${query_result}" "node_count")
7476
if [ "${node_count}" -lt 15 ]; then
75-
echo "anomalyDetectionPipeline: Warning: Skipping anomaly detection. Only ${node_count} ${language} ${nodeLabel} nodes. At least 15 required."
77+
echo "anomalyDetectionPython: Warning: Skipping anomaly detection. Only ${node_count} ${language} ${nodeLabel} nodes. At least 15 required."
7678
false
7779
else
78-
echo "anomalyDetectionPipeline: Info: Running anomaly detection with ${node_count} ${language} ${nodeLabel} nodes."
80+
echo "anomalyDetectionPython: Info: Running anomaly detection with ${node_count} ${language} ${nodeLabel} nodes."
7981
true
8082
fi
8183
}
@@ -92,7 +94,7 @@ is_sufficient_data_available() {
9294
anomaly_detection_features() {
9395
local nodeLabel
9496
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
95-
echo "anomalyDetectionPipeline: $(date +'%Y-%m-%dT%H:%M:%S%z') Collecting features for ${nodeLabel} nodes..."
97+
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Collecting features for ${nodeLabel} nodes..."
9698

9799
# Determine the Betweenness centrality (with the directed graph projection) if not already done
98100
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Betweenness-Exists.cypher" \
@@ -127,7 +129,7 @@ anomaly_detection_using_python() {
127129
local language
128130
language=$( extractQueryParameter "projection_language" "${@}" )
129131

130-
echo "anomalyDetectionPipeline: $(date +'%Y-%m-%dT%H:%M:%S%z') Executing Python scripts for ${language} ${nodeLabel} nodes..."
132+
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Executing Python scripts for ${language} ${nodeLabel} nodes..."
131133

132134
# Within the absolute (full) report directory for anomaly detection, create a sub directory for every detailed type (Java_Package, Java_Type,...)
133135
local detail_report_directory="${FULL_REPORT_DIRECTORY}/${language}_${nodeLabel}"
@@ -152,6 +154,8 @@ anomaly_detection_using_python() {
152154
# Required Parameters:
153155
# - projection_node_label=...
154156
# Label of the nodes that will be used for the projection. Example: "Package"
157+
# - projection_language=...
158+
# Name of the associated programming language. Examples: "Java", "Typescript"
155159
anomaly_detection_labels() {
156160
local nodeLabel
157161
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -177,12 +181,63 @@ anomaly_detection_labels() {
177181
# Label of the nodes that will be used for the projection. Example: "Package"
178182
# - projection_weight_property=...
179183
# Name of the node property that contains the dependency weight. Example: "weight"
184+
# - projection_language=...
185+
# Name of the associated programming language. Examples: "Java", "Typescript"
180186
anomaly_detection_python_reports() {
181187
time anomaly_detection_features "${@}"
182188
anomaly_detection_using_python "${@}"
183189
time anomaly_detection_labels "${@}"
184190
}
185191

192+
# Creates the markdown file (to be included in the main summary)
193+
# that contains the references to all treemap charts.
194+
anomaly_detection_treemap_charts_markdown_reference() {
195+
196+
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Starting treemap charts markdown reference generation..."
197+
198+
local detail_report_include_directory="${FULL_REPORT_DIRECTORY}/${MARKDOWN_INCLUDES_DIRECTORY}"
199+
mkdir -p "${detail_report_include_directory}"
200+
201+
local markdown_reference_file_name="TreemapChartsReference.md"
202+
local markdown_reference_file="${detail_report_include_directory}/${markdown_reference_file_name}"
203+
204+
# Write markdown references section title
205+
{
206+
echo "#### Treemap Charts"
207+
} > "${markdown_reference_file}"
208+
209+
# Find all treemap chart SVG files and add them to the markdown reference file
210+
find "${FULL_REPORT_DIRECTORY}" -type f -name "*Treemap*.svg" | sort | while read -r chart_file; do
211+
chart_filename=$(basename -- "${chart_file}")
212+
chart_filename_without_extension="${chart_filename%.*}" # Remove file extension
213+
{
214+
echo ""
215+
echo "![${chart_filename_without_extension}](./${chart_filename})"
216+
} >> "${markdown_reference_file}"
217+
done
218+
219+
# Add a horizontal rule at the end
220+
{
221+
echo ""
222+
echo "---"
223+
} >> "${markdown_reference_file}"
224+
225+
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Finished treemap charts markdown reference generation..."
226+
}
227+
228+
# Visualize results with treemap charts.
229+
#
230+
# Required Parameters:
231+
# - projection_language=...
232+
# Name of the associated programming language. Examples: "Java", "Typescript"
233+
anomaly_detection_treemap_charts() {
234+
local language
235+
language=$( extractQueryParameter "projection_language" "${@}" )
236+
237+
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Visualizing ${language} results..."
238+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/treemapVisualizations.py" "${@}" "--report_directory" "${FULL_REPORT_DIRECTORY}" ${verboseMode}
239+
}
240+
186241
# Create report directory
187242
REPORT_NAME="anomaly-detection"
188243
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
@@ -229,6 +284,7 @@ if is_sufficient_data_available "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=we
229284
if createUndirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection"; then
230285
createDirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection-directed"
231286
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
287+
anomaly_detection_treemap_charts "${ALGORITHM_LANGUAGE}=Java"
232288
fi
233289
fi
234290

@@ -238,12 +294,17 @@ if is_sufficient_data_available "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=
238294
if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"; then
239295
createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"
240296
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${ALGORITHM_LANGUAGE}=Typescript" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
297+
anomaly_detection_treemap_charts "${ALGORITHM_LANGUAGE}=Module"
241298
fi
242299
fi
243300

301+
# -- Markdown summary ---------------------------
302+
303+
anomaly_detection_treemap_charts_markdown_reference
304+
244305
# ---------------------------------------------------------------
245306

246307
# Clean-up after report generation. Empty reports will be deleted.
247308
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}"
248309

249-
echo "anomalyDetectionPipeline: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."
310+
echo "anomalyDetectionPython: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

0 commit comments

Comments
 (0)