Skip to content

Commit d3542c2

Browse files
authored
Merge branch 'master' into paulway_improve_topological_sort
2 parents c6aaf6a + 5ec5c3a commit d3542c2

File tree

353 files changed

+1600
-1197
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

353 files changed

+1600
-1197
lines changed

docs/api_index.rst

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ insights.core.dr
3030
:members:
3131
:exclude-members: requires, optional, metadata, group, tags
3232

33+
insights.core.exceptions
34+
------------------------
35+
36+
.. automodule:: insights.core.exceptions
37+
:members:
38+
:show-inheritance:
39+
:undoc-members:
40+
3341
insights.core.filters
3442
---------------------
3543

@@ -74,8 +82,8 @@ insights.parsers
7482
----------------
7583

7684
.. automodule:: insights.parsers
77-
:members: ParseException, SkipException, calc_offset, get_active_lines,
78-
keyword_search, optlist_to_dict, parse_delimited_table,
85+
:members: calc_offset, get_active_lines, keyword_search,
86+
optlist_to_dict, parse_delimited_table,
7987
parse_fixed_table, split_kv_pairs, unsplit_lines
8088
:show-inheritance:
8189
:undoc-members:

docs/exception_model.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ any exceptions in the data (“dirty parser”). This allows rules that don’t
8282
exceptions to rely on only the first parser, and those rules will not run if valid data
8383
is not present. If the dirty parser identifies errors in the data then it will save
8484
information regarding the errors for use by rules. If no errors are found in the data
85-
then the dirty parser will raise :py:class:`insights.core.exceptions.SkipException`
85+
then the dirty parser will raise :py:class:`insights.core.exceptions.SkipComponent`
8686
to indicate to the engine that it should be removed from the dependency hierarchy.
8787

8888
Other Exceptions from Parsers
@@ -99,15 +99,13 @@ types aren’t important and such checks may limit expressiveness and flexibilit
9999
Parsers should not use the assert statement in place of error handling code.
100100
Asserts are for debugging purposes only.
101101

102-
SkipComponent and SkipException
103-
===============================
102+
SkipComponent
103+
=============
104104

105105
Any component may raise `SkipComponent` to signal to the engine that
106106
nothing is wrong but that the component should be taken out of dependency
107107
resolution. This is useful if a component's dependencies are met but it's
108108
still unable to produce a meaningful result.
109-
:py:class:`insights.core.exceptions.SkipException` is a specialization of this for the
110-
dirty parser use case above, but it's treated the same as `SkipComponent`.
111109

112110
Exception Recognition by the Insights Engine
113111
============================================
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.. automodule:: insights.parsers.blacklisted
2+
:members:
3+
:show-inheritance:

insights/__init__.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@
3030
from insights.core import (CommandParser, ContainerParser, FileListing, IniConfigFile, JSONParser, LegacyItemAccess, # noqa: F401
3131
LogFileOutput, Parser, Scannable, SysconfigOptions, Syslog, XMLParser, YAMLParser, dr, # noqa: F401
3232
taglang)
33-
from insights.core.archives import COMPRESSION_TYPES, InvalidArchive, InvalidContentType, extract
33+
from insights.core.archives import COMPRESSION_TYPES, extract
3434
from insights.core.context import (ClusterArchiveContext, ExecutionContext, HostContext, # noqa: F401
3535
HostArchiveContext, SerializedArchiveContext)
36-
from insights.core.exceptions import SkipComponent # noqa: F401
36+
from insights.core.exceptions import InvalidArchive, InvalidContentType, SkipComponent # noqa: F401
3737
from insights.core.filters import add_filter, apply_filters, get_filters # noqa: F401
3838
from insights.core.hydration import create_context, initialize_broker # noqa: F401
3939
from insights.core.plugins import (combiner, condition, datasource, fact, incident, make_fail, make_fingerprint, # noqa: F401
@@ -266,9 +266,8 @@ def _load_context(path):
266266
return dr.get_component(path)
267267

268268

269-
def run(component=None, root=None, print_summary=False,
270-
context=None, inventory=None, print_component=None):
271-
269+
def run(component=None, root=None, print_summary=False, context=None, inventory=None, print_component=None,
270+
store_skips=False):
272271
args = None
273272
formatters = None
274273

@@ -293,6 +292,8 @@ def run(component=None, root=None, print_summary=False,
293292
p.add_argument("--context", help="Execution Context. Defaults to HostContext if an archive isn't passed.")
294293
p.add_argument("--no-load-default", help="Don't load the default plugins.", action="store_true")
295294
p.add_argument("--parallel", help="Execute rules in parallel.", action="store_true")
295+
p.add_argument("--show-skips", help="Capture skips in the broker for troubleshooting.", action="store_true",
296+
default=False)
296297
p.add_argument("--tags", help="Expression to select rules by tag.")
297298

298299
class Args(object):
@@ -385,6 +386,10 @@ class Args(object):
385386
graph = dr.COMPONENTS[dr.GROUPS.single]
386387

387388
broker = dr.Broker()
389+
if args:
390+
broker.store_skips = args.show_skips
391+
else:
392+
broker.store_skips = store_skips
388393

389394
if args and args.bare:
390395
ctx = ExecutionContext() # dummy context that no spec depend on. needed for filters to work

insights/client/apps/malware_detection/__init__.py

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import logging
88
from glob import glob
99
from datetime import datetime
10-
from tempfile import NamedTemporaryFile
10+
from tempfile import NamedTemporaryFile, gettempdir
1111
try:
1212
# python 2
1313
from urllib import quote as urlencode
@@ -21,7 +21,8 @@
2121
from insights.client.utilities import (
2222
generate_machine_id, write_data_to_file, get_time
2323
)
24-
from insights.util.subproc import call, CalledProcessError
24+
from insights.core.exceptions import CalledProcessError
25+
from insights.util.subproc import call
2526

2627
logger = logging.getLogger(__name__)
2728
MIN_YARA_VERSION = "4.1.0"
@@ -186,6 +187,7 @@ def __init__(self, insights_config):
186187
self.add_metadata = self._get_config_option('add_metadata', False)
187188

188189
self.matches = 0
190+
self.potential_matches = 0
189191

190192
def run(self):
191193
# Start the scans and record the time they were started
@@ -201,7 +203,11 @@ def run(self):
201203

202204
# Write a message to user informing them if there were matches or not and what to do next
203205
if self.matches == 0:
204-
logger.info("No rule matches found.\n")
206+
if self.potential_matches == 0:
207+
logger.info("No rule matches found.\n")
208+
else:
209+
logger.info("Rule matches potentially found but problems encountered parsing them, so no match data to upload.")
210+
logger.info("Please contact support.\n")
205211
else:
206212
logger.info("Found %d rule match%s.", self.matches, 'es' if self.matches > 1 else '')
207213
if not self.test_scan:
@@ -604,11 +610,12 @@ def _get_rules(self):
604610
# However it can happen that the rules file isn't removed for some reason, so remove any existing
605611
# rules files before beginning a new scan, otherwise they may show up as matches in the scan results.
606612
old_rules_files = sum([glob(os.path.join(path, rules))
607-
for path in ('/tmp', '/var/tmp')
613+
for path in ('/tmp', '/var/tmp', '/usr/tmp', gettempdir())
608614
for rules in ('.tmpmdsigs*', 'tmp_malware-detection-client_rules.*')], [])
609615
for old_rules_file in old_rules_files:
610-
logger.debug("Removing old rules file %s", old_rules_file)
611-
os.remove(old_rules_file)
616+
if os.path.exists(old_rules_file):
617+
logger.debug("Removing old rules file %s", old_rules_file)
618+
os.remove(old_rules_file)
612619

613620
self.rules_location = self._get_config_option('rules_location', '')
614621

@@ -741,8 +748,16 @@ def scan_filesystem(self):
741748
return False
742749

743750
# Exclude the rules file and insights-client log files, unless they are things we specifically want to scan
744-
if self.rules_file not in self.scan_fsobjects:
745-
self.filesystem_scan_exclude_list.append(self.rules_file)
751+
# Get a list of potential rules files locations,eg /tmp, /var/tmp, /usr/tmp and gettempdir()
752+
# eg customers may have /tmp linked to /var/tmp so both must be checked for excluding the downloaded rules
753+
rules_file_name = os.path.basename(self.rules_file)
754+
potential_tmp_dirs = set([gettempdir(), '/tmp', '/var/tmp', '/usr/tmp'])
755+
potential_rules_files = set(list(map(lambda d: os.path.join(d, rules_file_name), potential_tmp_dirs)) + [self.rules_file])
756+
rules_files = list(filter(lambda f: os.path.isfile(f), potential_rules_files))
757+
for rules_file in rules_files:
758+
if rules_file not in self.scan_fsobjects:
759+
self.filesystem_scan_exclude_list.append(rules_file)
760+
logger.debug("Excluding rules file: %s", rules_file)
746761
insights_log_files = glob(constants.default_log_file + '*')
747762
self.filesystem_scan_exclude_list.extend(list(set(insights_log_files) - set(self.scan_fsobjects)))
748763

@@ -795,7 +810,12 @@ def scan_filesystem(self):
795810
logger.debug("Unable to scan %s: %s", toplevel_dir, cpe.output.strip())
796811
continue
797812

798-
self.parse_scan_output(output.strip())
813+
try:
814+
self.parse_scan_output(output.strip())
815+
except Exception as e:
816+
self.potential_matches += 1
817+
logger.exception("Rule match(es) potentially found in %s but problems encountered parsing the results: %s. Skipping ...",
818+
toplevel_dir, str(e))
799819

800820
dir_scan_end = time.time()
801821
logger.info("Scan time for %s: %d seconds", toplevel_dir, (dir_scan_end - dir_scan_start))
@@ -862,7 +882,12 @@ def scan_processes(self):
862882
logger.debug("Unable to scan process %s: %s", scan_pid, cpe.output.strip())
863883
continue
864884

865-
self.parse_scan_output(output)
885+
try:
886+
self.parse_scan_output(output)
887+
except Exception as e:
888+
self.potential_matches += 1
889+
logger.exception("Rule match(es) potentially found in process %s but problems encountered parsing the results: %s. Skipping ...",
890+
scan_pid, str(e))
866891

867892
pid_scan_end = time.time()
868893
logger.info("Scan time for process %s: %d seconds", scan_pid, (pid_scan_end - pid_scan_start))
@@ -969,11 +994,15 @@ def skip_string_data_lines(string_data_lines):
969994
rule_match['matches'] = [rule_match_dict]
970995

971996
if self.add_metadata:
972-
# Add extra data to each rule match, beyond what yara provides
973-
# Eg, for files: line numbers & context, checksums; for processes: process name
974-
# TODO: find more pythonic ways of doing this stuff instead of using system commands
975-
metadata_func = self._add_file_metadata if source_type == 'file' else self._add_process_metadata
976-
metadata_func(rule_match['matches'])
997+
try:
998+
# Add extra data to each rule match, beyond what yara provides
999+
# Eg, for files: line numbers & context, checksums; for processes: process name
1000+
# TODO: find more pythonic ways of doing this stuff instead of using system commands
1001+
metadata_func = self._add_file_metadata if source_type == 'file' else self._add_process_metadata
1002+
metadata_func(rule_match['matches'])
1003+
except Exception as e:
1004+
logger.error("Error adding metadata to rule match %s in %s %s: %s. Skipping ...",
1005+
rule_name, source_type, source, str(e))
9771006

9781007
self.matches += 1
9791008
logger.info("Matched rule %s in %s %s", rule_name, source_type, source)

insights/client/data_collector.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from subprocess import Popen, PIPE, STDOUT
1616
from tempfile import NamedTemporaryFile
1717

18+
from insights.core.blacklist import BLACKLISTED_SPECS
1819
from insights.util import mangle
1920
from ..contrib.soscleaner import SOSCleaner
2021
from .utilities import _expand_paths, get_version_info, systemd_notify_init_thread, get_tags
@@ -132,6 +133,10 @@ def _write_blacklist_report(self, blacklist_report):
132133
self.archive.add_metadata_to_archive(
133134
json.dumps(blacklist_report), '/blacklist_report')
134135

136+
if BLACKLISTED_SPECS:
137+
self.archive.add_metadata_to_archive(
138+
json.dumps({"specs": BLACKLISTED_SPECS}), '/blacklisted_specs.txt')
139+
135140
def _write_egg_release(self):
136141
logger.debug("Writing egg release to archive...")
137142
egg_release = ''
@@ -327,11 +332,13 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report):
327332
'insights_commands', mangle.mangle_command(c['command']))
328333
if c['command'] in rm_commands or c.get('symbolic_name') in rm_commands:
329334
logger.warn("WARNING: Skipping command %s", c['command'])
335+
BLACKLISTED_SPECS.append(c['symbolic_name'])
330336
elif self.mountpoint == "/" or c.get("image"):
331337
cmd_specs = self._parse_command_spec(c, conf['pre_commands'])
332338
for s in cmd_specs:
333339
if s['command'] in rm_commands:
334340
logger.warn("WARNING: Skipping command %s", s['command'])
341+
BLACKLISTED_SPECS.append(s['symbolic_name'])
335342
continue
336343
cmd_spec = InsightsCommand(self.config, s, self.mountpoint)
337344
self.archive.add_to_archive(cmd_spec)
@@ -343,12 +350,14 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report):
343350
for f in conf['files']:
344351
if f['file'] in rm_files or f.get('symbolic_name') in rm_files:
345352
logger.warn("WARNING: Skipping file %s", f['file'])
353+
BLACKLISTED_SPECS.append(f['symbolic_name'])
346354
else:
347355
file_specs = self._parse_file_spec(f)
348356
for s in file_specs:
349357
# filter files post-wildcard parsing
350358
if s['file'] in rm_conf.get('files', []):
351359
logger.warn("WARNING: Skipping file %s", s['file'])
360+
BLACKLISTED_SPECS.append(s['symbolic_name'])
352361
else:
353362
file_spec = InsightsFile(s, self.mountpoint)
354363
self.archive.add_to_archive(file_spec)
@@ -361,11 +370,13 @@ def run_collection(self, conf, rm_conf, branch_info, blacklist_report):
361370
if g.get('symbolic_name') in rm_files:
362371
# ignore glob via symbolic name
363372
logger.warn("WARNING: Skipping file %s", g['glob'])
373+
BLACKLISTED_SPECS.append(g['symbolic_name'])
364374
else:
365375
glob_specs = self._parse_glob_spec(g)
366376
for g in glob_specs:
367377
if g['file'] in rm_files:
368378
logger.warn("WARNING: Skipping file %s", g['file'])
379+
BLACKLISTED_SPECS.append(g['symbolic_name'])
369380
else:
370381
glob_spec = InsightsFile(g, self.mountpoint)
371382
self.archive.add_to_archive(glob_spec)

insights/collect.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"""
1010
from __future__ import print_function
1111
import argparse
12+
import json
1213
import logging
1314
import os
1415
import sys
@@ -17,11 +18,13 @@
1718

1819
from datetime import datetime
1920

20-
from insights import apply_configs, apply_default_enabled, dr, get_pool
21-
from insights.core import blacklist, filters
21+
from insights import apply_configs, apply_default_enabled, get_pool
22+
from insights.core import blacklist, dr, filters
23+
from insights.core.blacklist import BLACKLISTED_SPECS
24+
from insights.core.exceptions import CalledProcessError
2225
from insights.core.serde import Hydration
2326
from insights.util import fs
24-
from insights.util.subproc import call, CalledProcessError
27+
from insights.util.subproc import call
2528

2629
SAFE_ENV = {
2730
"PATH": os.path.pathsep.join([
@@ -203,8 +206,8 @@
203206
- name: insights.components.virtualization.IsBareMetal
204207
enabled: true
205208
206-
# needed for the 'pre-check' of the 'ss' spec
207-
- name: insights.parsers.lsmod
209+
# needed for the 'pre-check' of the 'ss' spec and the 'modinfo_filtered_modules' spec
210+
- name: insights.parsers.lsmod.LsMod
208211
enabled: true
209212
210213
# needed for the 'pre-check' of the 'is_satellite_server' spec
@@ -401,6 +404,7 @@ def collect(manifest=default_manifest, tmp_path=None, compress=False, rm_conf=No
401404
log.warning('WARNING: Unknown component in blacklist: %s' % component)
402405
else:
403406
dr.set_enabled(component, enabled=False)
407+
BLACKLISTED_SPECS.append(component.split('.')[-1])
404408
log.warning('WARNING: Skipping component: %s', component)
405409

406410
to_persist = get_to_persist(client.get("persist", set()))
@@ -437,6 +441,11 @@ def collect(manifest=default_manifest, tmp_path=None, compress=False, rm_conf=No
437441
broker.add_observer(h.make_persister(to_persist))
438442
dr.run_all(broker=broker, pool=pool)
439443

444+
if BLACKLISTED_SPECS:
445+
_write_out_blacklisted_specs(output_path)
446+
# Delete the list so the specs aren't written again by the client.
447+
del BLACKLISTED_SPECS[:]
448+
440449
collect_errors = _parse_broker_exceptions(broker, EXCEPTIONS_TO_REPORT)
441450

442451
if compress:
@@ -472,6 +481,41 @@ def _parse_broker_exceptions(broker, exceptions_to_report):
472481
return errors
473482

474483

484+
def _write_out_blacklisted_specs(output_path):
485+
"""
486+
Write out the blacklisted specs to blacklisted_specs.txt, and create
487+
a meta-data file for this file. That way it can be loaded when the
488+
archive is processed.
489+
490+
Args:
491+
output_path (str): Path of the output directory.
492+
"""
493+
if os.path.exists(os.path.join(output_path, "meta_data")):
494+
output_path_root = os.path.join(output_path, "data")
495+
else:
496+
output_path_root = output_path
497+
498+
with open(os.path.join(output_path_root, "blacklisted_specs.txt"), "w") as of:
499+
json.dump({"specs": BLACKLISTED_SPECS}, of)
500+
501+
doc = {
502+
"name": "insights.specs.Specs.blacklisted_specs",
503+
"exec_time": 0.0,
504+
"errors": [],
505+
"results": {
506+
"type": "insights.core.spec_factory.DatasourceProvider",
507+
"object": {
508+
"relative_path": "blacklisted_specs.txt"
509+
}
510+
},
511+
"ser_time": 0.0
512+
}
513+
514+
meta_path = os.path.join(os.path.join(output_path, "meta_data"), "insights.specs.Specs.blacklisted_specs")
515+
with open(meta_path, "w") as of:
516+
json.dump(doc, of)
517+
518+
475519
def main():
476520
# Remove command line args so that they are not parsed by any called modules
477521
# The main fxn is only invoked as a cli, if calling from another cli then

0 commit comments

Comments
 (0)