Skip to content

Commit 2894d99

Browse files
committed
Add graph visualizations to anomaly detection
1 parent 8f86f7a commit 2894d99

File tree

12 files changed

+740
-19
lines changed

12 files changed

+740
-19
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
4+
# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".
5+
6+
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
7+
8+
# Requires anomalyDetectionGraphVisualization.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
21+
# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
22+
23+
# Get the "summary" directory by taking the path of this script and selecting "summary".
24+
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection
25+
26+
# Delegate the execution to the responsible script.
27+
source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Authority" including their incoming and outgoing dependencies, sizes based on PageRank and thick outline for nodes with high Page Rank to Article Rank difference in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
,percentileDisc(sourceForStatistics.centralityPageRankToArticleRankDifference, 0.80) AS pageToArticleRankThreshold
9+
,percentileDisc(targetForStatistics.centralityPageRankNormalized, 0.80) AS pageRankThreshold
10+
// Step 2: Query selected central node
11+
MATCH (central)
12+
WHERE $projection_node_label IN labels(central)
13+
AND central.anomalyAuthorityRank = toInteger($projection_node_rank)
14+
WITH maxWeight
15+
,pageToArticleRankThreshold
16+
,pageRankThreshold
17+
,central
18+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Authority\\n" AS graphLabel
19+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
20+
,[] AS graphVizOutput
21+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
22+
WITH *, "🏛️ authority #" + central.anomalyAuthorityRank + "\\n" + central.name AS centralNodeLabel
23+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
24+
// Step 3: Query direct incoming dependencies to the central node
25+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
26+
WHERE $projection_node_label IN labels(source)
27+
AND source.outgoingDependencies > 0
28+
ORDER BY dependency.weight DESC, source.name ASC
29+
LIMIT 40
30+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
31+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
32+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
33+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
34+
THEN 5 ELSE 2 END AS scaledNodeBorder
35+
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
36+
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
37+
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
38+
// Add the last part of the element id to make the node name unique, even if the name itself isn't.
39+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
40+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
41+
// Split long names like inner classes identified by a dollar sign ($)
42+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
43+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directInLabel
44+
WITH *, " [" + nodeEmphasis + directInLabel + directInBorder + "]; " AS directInNodeProperties
45+
WITH *, "\"" + sourceId + "\" " + directInNodeProperties AS directInNode
46+
WITH maxWeight
47+
,pageToArticleRankThreshold
48+
,pageRankThreshold
49+
,central
50+
,graphVizOutput
51+
,collect(source) AS incomingDependencyNodes
52+
,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
53+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
54+
// Step 4: Query direct outgoing dependencies from the central node
55+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
56+
WHERE $projection_node_label IN labels(source)
57+
AND source.incomingDependencies > 0
58+
ORDER BY dependency.weight DESC, source.name ASC
59+
LIMIT 40
60+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
61+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
62+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
63+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
64+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
65+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
66+
THEN 5 ELSE 2 END AS scaledNodeBorder
67+
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
68+
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
69+
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
70+
// Add the last part of the element id to make the node name unique, even if the name itself isn't.
71+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
72+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
73+
WITH *, "color = 5; fillcolor = 1; " AS directOutColors
74+
// Split long names like inner classes identified by a dollar sign ($)
75+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
76+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directOutLabel
77+
WITH *, " [" + nodeEmphasis + directOutLabel + directOutBorder + directOutColors + "]; " AS directOutNodeProperties
78+
WITH *, "\"" + sourceId + "\" " + directOutNodeProperties AS directOutNode
79+
WITH maxWeight
80+
,central
81+
,graphVizOutput
82+
,incomingDependencyNodes
83+
,collect(source) AS outgoingDependencyNodes
84+
,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
85+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
86+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
87+
// Step 5: Query dependencies between direct dependencies outside the central node
88+
UNWIND directDependentNodes AS directDependentNode
89+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
90+
WHERE anotherDirectDependentNode IN directDependentNodes
91+
AND anotherDirectDependentNode <> directDependentNode
92+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
93+
WITH graphVizOutput
94+
,directDependentNode
95+
,dependency
96+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
97+
LIMIT 140
98+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
99+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
100+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
101+
// Use an even lighter color for secondary dependency edges
102+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
103+
WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
104+
WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
105+
WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\"" AS directDependenciesEdge
106+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
107+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
108+
UNWIND graphVizOutput AS graphVizOutputLine
109+
RETURN DISTINCT graphVizOutputLine
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Bottleneck" including their incoming and outgoing dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
// Step 2: Query selected central node
9+
MATCH (central)
10+
WHERE $projection_node_label IN labels(central)
11+
AND central.anomalyBottleneckRank = toInteger($projection_node_rank)
12+
WITH maxWeight
13+
,central
14+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Bottleneck\\n" AS graphLabel
15+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
16+
,[] AS graphVizOutput
17+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
18+
WITH *, "🔒 bottleneck #" + central.anomalyBottleneckRank + "\\n" + central.name AS centralNodeLabel
19+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
20+
// Step 3: Query direct incoming dependencies to the central node
21+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
22+
WHERE $projection_node_label IN labels(source)
23+
AND source.outgoingDependencies > 0
24+
ORDER BY dependency.weight DESC, source.name ASC
25+
LIMIT 60
26+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
27+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
28+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
29+
WITH maxWeight
30+
,central
31+
,graphVizOutput
32+
,collect(source) AS incomingDependencyNodes
33+
,collect("\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
34+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
35+
// Step 4: Query direct outgoing dependencies from the central node
36+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
37+
WHERE $projection_node_label IN labels(source)
38+
AND source.incomingDependencies > 0
39+
ORDER BY dependency.weight DESC, source.name ASC
40+
LIMIT 60
41+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
42+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
43+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
44+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
45+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
46+
WITH *, "\"" + source.name + "\" [color = 5; fillcolor = 1;]; " AS directOutNode
47+
WITH maxWeight
48+
,central
49+
,graphVizOutput
50+
,incomingDependencyNodes
51+
,collect(source) AS outgoingDependencyNodes
52+
,collect(directOutNode + "central -> \"" + source.name + "\" [" + edgeAttributes + "];") AS directOutEdges
53+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
54+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
55+
// Step 5: Query dependencies between direct dependencies outside the central node
56+
UNWIND directDependentNodes AS directDependentNode
57+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
58+
WHERE anotherDirectDependentNode IN directDependentNodes
59+
AND anotherDirectDependentNode <> directDependentNode
60+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
61+
WITH maxWeight
62+
,central
63+
,graphVizOutput
64+
,directDependentNode
65+
,dependency
66+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
67+
LIMIT 120
68+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
69+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
70+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
71+
// Use a light color for secondary dependency edges
72+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
73+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
74+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
75+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
76+
UNWIND graphVizOutput AS graphVizOutputLine
77+
RETURN DISTINCT graphVizOutputLine
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Bridge" including their incoming and outgoing dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
// Step 2: Query selected central node
9+
MATCH (central)
10+
WHERE $projection_node_label IN labels(central)
11+
AND central.anomalyBridgeRank = toInteger($projection_node_rank)
12+
WITH maxWeight
13+
,central
14+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Bridge\\n" AS graphLabel
15+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
16+
,[] AS graphVizOutput
17+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
18+
WITH *, "🌉 bridge #" + central.anomalyBridgeRank + "\\n" + central.name AS centralNodeLabel
19+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
20+
// Step 3: Query direct incoming dependencies to the central node
21+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
22+
WHERE $projection_node_label IN labels(source)
23+
AND source.outgoingDependencies > 0
24+
ORDER BY dependency.weight DESC, source.name ASC
25+
LIMIT 60
26+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
27+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
28+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
29+
WITH maxWeight
30+
,central
31+
,graphVizOutput
32+
,collect(source) AS incomingDependencyNodes
33+
,collect("\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
34+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
35+
// Step 4: Query direct outgoing dependencies from the central node
36+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
37+
WHERE $projection_node_label IN labels(source)
38+
AND source.incomingDependencies > 0
39+
ORDER BY dependency.weight DESC, source.name ASC
40+
LIMIT 60
41+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
42+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
43+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
44+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
45+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
46+
WITH *, "\"" + source.name + "\" [color = 5; fillcolor = 1;]; " AS directOutNode
47+
WITH maxWeight
48+
,central
49+
,graphVizOutput
50+
,incomingDependencyNodes
51+
,collect(source) AS outgoingDependencyNodes
52+
,collect(directOutNode + "central -> \"" + source.name + "\" [" + edgeAttributes + "];") AS directOutEdges
53+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
54+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
55+
// Step 5: Query dependencies between direct dependencies outside the central node
56+
UNWIND directDependentNodes AS directDependentNode
57+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
58+
WHERE anotherDirectDependentNode IN directDependentNodes
59+
AND anotherDirectDependentNode <> directDependentNode
60+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
61+
WITH maxWeight
62+
,central
63+
,graphVizOutput
64+
,directDependentNode
65+
,dependency
66+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
67+
LIMIT 140
68+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
69+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
70+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
71+
// Use a light color for secondary dependency edges
72+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
73+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
74+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
75+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
76+
UNWIND graphVizOutput AS graphVizOutputLine
77+
RETURN DISTINCT graphVizOutputLine

0 commit comments

Comments
 (0)