diff --git a/bbot/__init__.py b/bbot/__init__.py index 914c45ff4b..8e016095f4 100644 --- a/bbot/__init__.py +++ b/bbot/__init__.py @@ -1,6 +1,2 @@ # version placeholder (replaced by poetry-dynamic-versioning) __version__ = "v0.0.0" - -from .scanner import Scanner, Preset - -__all__ = ["Scanner", "Preset"] diff --git a/bbot/constants.py b/bbot/constants.py new file mode 100644 index 0000000000..364aead2ff --- /dev/null +++ b/bbot/constants.py @@ -0,0 +1,72 @@ +SCAN_STATUS_QUEUED = 0 +SCAN_STATUS_NOT_STARTED = 1 +SCAN_STATUS_STARTING = 2 +SCAN_STATUS_RUNNING = 3 +SCAN_STATUS_FINISHING = 4 +SCAN_STATUS_ABORTING = 5 +SCAN_STATUS_FINISHED = 6 +SCAN_STATUS_FAILED = 7 +SCAN_STATUS_ABORTED = 8 + + +SCAN_STATUSES = { + "QUEUED": SCAN_STATUS_QUEUED, + "NOT_STARTED": SCAN_STATUS_NOT_STARTED, + "STARTING": SCAN_STATUS_STARTING, + "RUNNING": SCAN_STATUS_RUNNING, + "FINISHING": SCAN_STATUS_FINISHING, + "ABORTING": SCAN_STATUS_ABORTING, + "FINISHED": SCAN_STATUS_FINISHED, + "FAILED": SCAN_STATUS_FAILED, + "ABORTED": SCAN_STATUS_ABORTED, +} + +SCAN_STATUS_CODES = {v: k for k, v in SCAN_STATUSES.items()} + + +def is_valid_scan_status(status): + """ + Check if a status is a valid scan status + """ + return status in SCAN_STATUSES + + +def is_valid_scan_status_code(status): + """ + Check if a status is a valid scan status code + """ + return status in SCAN_STATUS_CODES + + +def get_scan_status_name(status): + """ + Convert a numeric scan status code to a string status name + """ + try: + if isinstance(status, str): + if not is_valid_scan_status(status): + raise ValueError(f"Invalid scan status: {status}") + return status + elif isinstance(status, int): + return SCAN_STATUS_CODES[status] + else: + raise ValueError(f"Invalid scan status: {status} (must be int or str)") + except KeyError: + raise ValueError(f"Invalid scan status: {status}") + + +def get_scan_status_code(status): + """ + Convert a scan status string to a numeric status code + """ + try: + if isinstance(status, int): + if not is_valid_scan_status_code(status): + raise ValueError(f"Invalid scan status code: {status}") + return status + elif isinstance(status, str): + return SCAN_STATUSES[status] + else: + raise ValueError(f"Invalid scan status: {status} (must be int or str)") + except KeyError: + raise ValueError(f"Invalid scan status: {status}") diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index c5773a3a0c..4f22b5157e 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -2,6 +2,7 @@ import sys import atexit import logging +import threading from copy import copy import multiprocessing import logging.handlers @@ -93,7 +94,10 @@ def cleanup_logging(self): # Stop queue listener with suppress(Exception): - self.listener.stop() + stop_thread = threading.Thread(target=self.listener.stop) + stop_thread.daemon = True + stop_thread.start() + stop_thread.join() def setup_queue_handler(self, logging_queue=None, log_level=logging.DEBUG): if logging_queue is None: diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 46102d6aef..27e9ef5a86 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -11,6 +11,7 @@ from pathlib import Path from typing import Optional +from zoneinfo import ZoneInfo from copy import copy, deepcopy from contextlib import suppress from radixtarget import RadixTarget @@ -40,6 +41,7 @@ validators, get_file_extension, ) +from bbot.models.helpers import utc_datetime_validator from bbot.core.helpers.web.envelopes import BaseEnvelope @@ -401,6 +403,8 @@ def host_filterable(self): @property def port(self): self.host + if self._port: + return self._port if getattr(self, "parsed_url", None): if self.parsed_url.port is not None: return self.parsed_url.port @@ -408,7 +412,6 @@ def port(self): return 443 elif self.parsed_url.scheme == "http": return 80 - return self._port @property def netloc(self): @@ -811,7 +814,7 @@ def __contains__(self, other): return bool(radixtarget.search(other.host)) return False - def json(self, mode="json", siem_friendly=False): + def json(self, mode="json"): """ Serializes the event object to a JSON-compatible dictionary. @@ -820,7 +823,6 @@ def json(self, mode="json", siem_friendly=False): Parameters: mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id". - siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary). Returns: dict: JSON-serializable dictionary representation of the event object. @@ -837,10 +839,12 @@ def json(self, mode="json", siem_friendly=False): data = data_attr else: data = smart_decode(self.data) - if siem_friendly: - j["data"] = {self.type: data} - else: + if isinstance(data, str): j["data"] = data + elif isinstance(data, dict): + j["data_json"] = data + else: + raise ValueError(f"Invalid data type: {type(data)}") # host, dns children if self.host: j["host"] = str(self.host) @@ -858,7 +862,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = self.timestamp.isoformat() + j["timestamp"] = utc_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -867,8 +871,7 @@ def json(self, mode="json", siem_friendly=False): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - if self.tags: - j.update({"tags": list(self.tags)}) + j.update({"tags": sorted(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) @@ -1084,9 +1087,10 @@ def __init__(self, *args, **kwargs): parent_path = parent.data.get("path", None) if parent_path is not None: self.data["path"] = parent_path - # inherit closest host + # inherit closest host+port if parent.host: self.data["host"] = str(parent.host) + self._port = parent.port # we do this to refresh the hash self.data = self.data break @@ -1097,6 +1101,7 @@ def __init__(self, *args, **kwargs): class DictPathEvent(DictEvent): def sanitize_data(self, data): + data = super().sanitize_data(data) new_data = dict(data) new_data["path"] = str(new_data["path"]) file_blobs = getattr(self.scan, "_file_blobs", False) @@ -1550,19 +1555,23 @@ class VULNERABILITY(ClosestHostEvent): "HIGH": "🟥", "MEDIUM": "🟧", "LOW": "🟨", + "INFO": "🟦", "UNKNOWN": "⬜", } def sanitize_data(self, data): + data = super().sanitize_data(data) self.add_tag(data["severity"].lower()) return data class _data_validator(BaseModel): host: Optional[str] = None severity: str + name: str description: str url: Optional[str] = None path: Optional[str] = None + cves: Optional[list[str]] = None _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) _validate_severity = field_validator("severity")(validators.validate_severity) @@ -1577,6 +1586,7 @@ class FINDING(ClosestHostEvent): class _data_validator(BaseModel): host: Optional[str] = None + name: str description: str url: Optional[str] = None path: Optional[str] = None @@ -1595,6 +1605,11 @@ class _data_validator(BaseModel): _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) + def _sanitize_data(self, data): + data = super()._sanitize_data(data) + data["technology"] = data["technology"].lower() + return data + def _data_id(self): # dedupe by host+port+tech tech = self.data.get("technology", "") @@ -1724,6 +1739,7 @@ class MOBILE_APP(DictEvent): _always_emit = True def _sanitize_data(self, data): + data = super()._sanitize_data(data) if isinstance(data, str): data = {"url": data} if "url" not in data: @@ -1886,7 +1902,7 @@ def make_event( ) -def event_from_json(j, siem_friendly=False): +def event_from_json(j): """ Creates an event object from a JSON dictionary. @@ -1917,10 +1933,12 @@ def event_from_json(j, siem_friendly=False): "context": j.get("discovery_context", None), "dummy": True, } - if siem_friendly: - data = j["data"][event_type] - else: - data = j["data"] + data = j.get("data_json", None) + if data is None: + data = j.get("data", None) + if data is None: + json_pretty = json.dumps(j, indent=2) + raise ValueError(f"data or data_json must be provided. JSON: {json_pretty}") kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) @@ -1929,7 +1947,12 @@ def event_from_json(j, siem_friendly=False): resolved_hosts = j.get("resolved_hosts", []) event._resolved_hosts = set(resolved_hosts) - event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) + + # accept both isoformat and unix timestamp + try: + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"], ZoneInfo("UTC")) + except Exception: + event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] parent_id = j.get("parent", None) if parent_id is not None: diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 64614d08e1..ddf0c1384d 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -1,5 +1,14 @@ ### BASIC OPTIONS ### +# NOTE: If used in a preset, these options must be nested underneath "config:" like so: +# config: +# home: ~/.bbot +# keep_scans: 20 +# scope: +# strict: true +# dns: +# minimal: true + # BBOT working directory home: ~/.bbot # How many scan results to keep before cleaning up the older ones @@ -15,7 +24,7 @@ folder_blobs: false scope: # strict scope means only exact DNS names are considered in-scope - # subdomains are not included unless they are explicitly provided in the target list + # their subdomains are not included unless explicitly whitelisted strict: false # Filter by scope distance which events are displayed in the output # 0 == show only in-scope events (affiliates are always shown) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py new file mode 100644 index 0000000000..b94bc976cc --- /dev/null +++ b/bbot/models/helpers.py @@ -0,0 +1,20 @@ +from datetime import datetime +from zoneinfo import ZoneInfo + + +def utc_datetime_validator(d: datetime) -> datetime: + """ + Converts all dates into UTC + """ + if d.tzinfo is not None: + return d.astimezone(ZoneInfo("UTC")) + else: + return d.replace(tzinfo=ZoneInfo("UTC")) + + +def utc_now() -> datetime: + return datetime.now(ZoneInfo("UTC")) + + +def utc_now_timestamp() -> datetime: + return utc_now().timestamp() diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py new file mode 100644 index 0000000000..68e71d493e --- /dev/null +++ b/bbot/models/pydantic.py @@ -0,0 +1,158 @@ +import json +import logging +from pydantic import BaseModel, ConfigDict, Field, computed_field +from typing import Optional, List, Annotated, get_origin, get_args + +from bbot.models.helpers import utc_now_timestamp + +log = logging.getLogger("bbot_server.models") + + +class BBOTBaseModel(BaseModel): + model_config = ConfigDict(extra="ignore") + + def to_json(self, **kwargs): + return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + + def __hash__(self): + return hash(self.to_json()) + + def __eq__(self, other): + return hash(self) == hash(other) + + @classmethod + def indexed_fields(cls): + indexed_fields = {} + + # Handle regular fields + for fieldname, field in cls.model_fields.items(): + if any(isinstance(m, str) and m.startswith("indexed") for m in field.metadata): + indexed_fields[fieldname] = field.metadata + + # Handle computed fields + for fieldname, field in cls.model_computed_fields.items(): + return_type = field.return_type + if get_origin(return_type) is Annotated: + type_args = get_args(return_type) + metadata = list(type_args[1:]) # Skip the first arg (the actual type) + if any(isinstance(m, str) and m.startswith("indexed") for m in metadata): + indexed_fields[fieldname] = metadata + + return indexed_fields + + # we keep these because they were a lot of work to make and maybe someday they'll be useful again + + # @classmethod + # def _get_type_hints(cls): + # """ + # Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + # """ + # type_hints = get_type_hints(cls) + # unwrapped_type_hints = {} + # for field_name in cls.model_fields: + # type_hint = type_hints[field_name] + # while 1: + # if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + # type_hint = type_hint.__args__[0] + # else: + # break + # unwrapped_type_hints[field_name] = type_hint + # return unwrapped_type_hints + + # @classmethod + # def _datetime_fields(cls): + # datetime_fields = [] + # for field_name, type_hint in cls._get_type_hints().items(): + # if type_hint == datetime: + # datetime_fields.append(field_name) + # return sorted(datetime_fields) + + +### EVENT ### + + +class Event(BBOTBaseModel): + uuid: Annotated[str, "indexed", "unique"] + id: Annotated[str, "indexed"] + type: Annotated[str, "indexed"] + scope_description: str + data: Annotated[Optional[str], "indexed"] = None + data_json: Optional[dict] = None + host: Annotated[Optional[str], "indexed"] = None + port: Optional[int] = None + netloc: Optional[str] = None + resolved_hosts: Optional[List] = None + dns_children: Optional[dict] = None + web_spider_distance: int = 10 + scope_distance: int = 10 + scan: Annotated[str, "indexed"] + timestamp: Annotated[float, "indexed"] + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=utc_now_timestamp) + parent: Annotated[str, "indexed"] + parent_uuid: Annotated[str, "indexed"] + tags: List = [] + module: Annotated[Optional[str], "indexed"] = None + module_sequence: Optional[str] = None + discovery_context: str = "" + discovery_path: List[str] = [] + parent_chain: List[str] = [] + archived: bool = False + + def get_data(self): + if self.data is not None: + return self.data + return self.data_json + + def __hash__(self): + return hash(self.id) + + @computed_field + @property + def reverse_host(self) -> Annotated[Optional[str], "indexed"]: + """ + We store the host in reverse to allow for instant subdomain queries + This works because indexes are left-anchored, but we need to search starting from the right side + """ + if self.host: + return self.host[::-1] + return None + + +### SCAN ### + + +class Scan(BBOTBaseModel): + id: Annotated[str, "indexed", "unique"] + name: str + status: Annotated[str, "indexed"] + started_at: Annotated[float, "indexed"] + finished_at: Annotated[Optional[float], "indexed"] = None + duration_seconds: Optional[float] = None + duration: Optional[str] = None + target: dict + preset: dict + + @classmethod + def from_scan(cls, scan): + return cls( + id=scan.id, + name=scan.name, + status=scan.status, + started_at=scan.started_at, + ) + + +### TARGET ### + + +class Target(BBOTBaseModel): + name: str = "Default Target" + strict_dns_scope: bool = False + seeds: List = [] + whitelist: Optional[List] = None + blacklist: List = [] + hash: Annotated[str, "indexed", "unique"] + scope_hash: Annotated[str, "indexed"] + seed_hash: Annotated[str, "indexed"] + whitelist_hash: Annotated[str, "indexed"] + blacklist_hash: Annotated[str, "indexed"] diff --git a/bbot/db/sql/models.py b/bbot/models/sql.py similarity index 81% rename from bbot/db/sql/models.py rename to bbot/models/sql.py index d6e7656108..d58034ccf6 100644 --- a/bbot/db/sql/models.py +++ b/bbot/models/sql.py @@ -3,13 +3,15 @@ import json import logging +from datetime import datetime from pydantic import ConfigDict from typing import List, Optional -from datetime import datetime, timezone from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime +from bbot.models.helpers import utc_now_timestamp + log = logging.getLogger("bbot_server.models") @@ -27,14 +29,6 @@ def naive_datetime_validator(d: datetime): NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - # handle datetime - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) - - class BBOTBaseModel(SQLModel): model_config = ConfigDict(extra="ignore") @@ -52,7 +46,7 @@ def validated(self): return self def to_json(self, **kwargs): - return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs) + return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) @classmethod def _pk_column_names(cls): @@ -71,20 +65,13 @@ def __eq__(self, other): class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - data = self._get_data(self.data, self.type) - self.data = {self.type: data} if self.host: self.reverse_host = self.host[::-1] def get_data(self): - return self._get_data(self.data, self.type) - - @staticmethod - def _get_data(data, type): - # handle SIEM-friendly format - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data + if self.data is not None: + return self.data + return self.data_json uuid: str = Field( primary_key=True, @@ -93,11 +80,12 @@ def _get_data(data, type): ) id: str = Field(index=True) type: str = Field(index=True) - scope_description: str - data: dict = Field(sa_type=JSON) + data: Optional[str] = Field(default=None, index=True) + data_json: Optional[dict] = Field(default=None, sa_type=JSON) host: Optional[str] port: Optional[int] netloc: Optional[str] + scope_description: str # store the host in reversed form for efficient lookups by domain reverse_host: Optional[str] = Field(default="", exclude=True, index=True) resolved_hosts: List = Field(default=[], sa_type=JSON) @@ -105,7 +93,8 @@ def _get_data(data, type): web_spider_distance: int = 10 scope_distance: int = Field(default=10, index=True) scan: str = Field(index=True) - timestamp: NaiveUTC = Field(index=True) + timestamp: float = Field(index=True) + inserted_at: float = Field(default_factory=utc_now_timestamp) parent: str = Field(index=True) tags: List = Field(default=[], sa_type=JSON) module: str = Field(index=True) @@ -113,7 +102,6 @@ def _get_data(data, type): discovery_context: str = "" discovery_path: List[str] = Field(default=[], sa_type=JSON) parent_chain: List[str] = Field(default=[], sa_type=JSON) - inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc)) ### SCAN ### @@ -136,9 +124,9 @@ class Scan(BBOTBaseModel, table=True): class Target(BBOTBaseModel, table=True): name: str = "Default Target" - strict_scope: bool = False + strict_dns_scope: bool = False seeds: List = Field(default=[], sa_type=JSON) - whitelist: List = Field(default=None, sa_type=JSON) + whitelist: Optional[List] = Field(default=None, sa_type=JSON) blacklist: List = Field(default=[], sa_type=JSON) hash: str = Field(sa_column=Column("hash", String(length=255), unique=True, primary_key=True, index=True)) scope_hash: str = Field(sa_column=Column("scope_hash", String(length=255), index=True)) diff --git a/bbot/modules/ajaxpro.py b/bbot/modules/ajaxpro.py index 1df424ebcc..c1ff413915 100644 --- a/bbot/modules/ajaxpro.py +++ b/bbot/modules/ajaxpro.py @@ -10,7 +10,7 @@ class ajaxpro(BaseModule): ajaxpro_regex = re.compile(r' 1: @@ -726,16 +726,18 @@ async def _worker(self): self.debug(f"Finished handling {event}") else: self.debug(f"Not accepting {event} because {reason}") - except asyncio.CancelledError: - # this trace was used for debugging leaked CancelledErrors from inside httpx - # self.log.trace("Worker cancelled") - raise - except BaseException as e: - if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): - self.scan.stop() - else: - self.error(f"Critical failure in module {self.name}: {e}") - self.error(traceback.format_exc()) + except asyncio.CancelledError: + # this trace was used for debugging leaked CancelledErrors from inside httpx + # self.log.trace("Worker cancelled") + raise + except RuntimeError as e: + self.trace(f"RuntimeError in module {self.name}: {e}") + except BaseException as e: + if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): + await self.scan.async_stop() + else: + self.error(f"Critical failure in module {self.name}: {e}") + self.error(traceback.format_exc()) self.log.trace("Worker stopped") @property @@ -1724,8 +1726,8 @@ class BaseInterceptModule(BaseModule): _intercept = True async def _worker(self): - async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): - try: + try: + async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): while not self.scan.stopping and not self.errored: try: if self.incoming_event_queue is not False: @@ -1784,16 +1786,19 @@ async def _worker(self): self.debug(f"Forwarding {event}") await self.forward_event(event, kwargs) - except asyncio.CancelledError: - # this trace was used for debugging leaked CancelledErrors from inside httpx - # self.log.trace("Worker cancelled") - raise - except BaseException as e: - if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): - self.scan.stop() - else: - self.critical(f"Critical failure in intercept module {self.name}: {e}") - self.critical(traceback.format_exc()) + except asyncio.CancelledError: + # this trace was used for debugging leaked CancelledErrors from inside httpx + # self.log.trace("Worker cancelled") + raise + except RuntimeError as e: + self.trace(f"RuntimeError in intercept module {self.name}: {e}") + except BaseException as e: + if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): + await self.scan.async_stop() + else: + self.critical(f"Critical failure in intercept module {self.name}: {e}") + self.critical(traceback.format_exc()) + self.log.trace("Worker stopped") async def get_incoming_event(self): diff --git a/bbot/modules/bypass403.py b/bbot/modules/bypass403.py index 61fb510775..0a501b3f91 100644 --- a/bbot/modules/bypass403.py +++ b/bbot/modules/bypass403.py @@ -141,6 +141,7 @@ async def handle_event(self, event): if len(results) > collapse_threshold: await self.emit_event( { + "name": "Possible 403 Bypass", "description": f"403 Bypass MULTIPLE SIGNATURES (exceeded threshold {str(collapse_threshold)})", "host": str(event.host), "url": event.data, @@ -152,7 +153,12 @@ async def handle_event(self, event): else: for description in results: await self.emit_event( - {"description": description, "host": str(event.host), "url": event.data}, + { + "name": "Possible 403 Bypass", + "description": description, + "host": str(event.host), + "url": event.data, + }, "FINDING", parent=event, context=f"{{module}} discovered potential 403 bypass ({{event.type}}) for {event.data}", diff --git a/bbot/modules/dotnetnuke.py b/bbot/modules/dotnetnuke.py index 7e8b4d3d4e..0f7301b33a 100644 --- a/bbot/modules/dotnetnuke.py +++ b/bbot/modules/dotnetnuke.py @@ -55,6 +55,8 @@ async def interactsh_callback(self, r): "host": str(event.host), "url": url, "description": description, + "cves": ["CVE-2017-0929"], + "name": "DotNetNuke Blind-SSRF", }, "VULNERABILITY", event, @@ -106,6 +108,7 @@ async def handle_event(self, event): "description": description, "host": str(event.host), "url": probe_url, + "name": "DotNetNuke Cookie Deserialization", }, "VULNERABILITY", event, @@ -124,6 +127,7 @@ async def handle_event(self, event): { "severity": "CRITICAL", "description": description, + "name": "DotNetNuke Arbitrary File Read", "host": str(event.host), "url": f"{event.data['url']}/DesktopModules/dnnUI_NewsArticlesSlider/ImageHandler.ashx", }, @@ -143,6 +147,7 @@ async def handle_event(self, event): { "severity": "CRITICAL", "description": description, + "name": "DotNetNuke Arbitrary File Read", "host": str(event.host), "url": f"{event.data['url']}/Desktopmodules/DNNArticle/GetCSS.ashx/?CP=%2fweb.config", }, @@ -164,6 +169,7 @@ async def handle_event(self, event): { "severity": "CRITICAL", "description": description, + "name": "DotNetNuke Privilege Escalation", "host": str(event.host), "url": f"{event.data['url']}/Install/InstallWizard.aspx", }, diff --git a/bbot/modules/generic_ssrf.py b/bbot/modules/generic_ssrf.py deleted file mode 100644 index 6ccde510b9..0000000000 --- a/bbot/modules/generic_ssrf.py +++ /dev/null @@ -1,262 +0,0 @@ -from bbot.errors import InteractshError -from bbot.modules.base import BaseModule - - -ssrf_params = [ - "Dest", - "Redirect", - "URI", - "Path", - "Continue", - "URL", - "Window", - "Next", - "Data", - "Reference", - "Site", - "HTML", - "Val", - "Validate", - "Domain", - "Callback", - "Return", - "Page", - "Feed", - "Host", - "Port", - "To", - "Out", - "View", - "Dir", - "Show", - "Navigation", - "Open", -] - - -class BaseSubmodule: - technique_description = "base technique description" - severity = "INFO" - paths = [] - - def __init__(self, generic_ssrf): - self.generic_ssrf = generic_ssrf - self.test_paths = self.create_paths() - - def set_base_url(self, event): - return f"{event.parsed_url.scheme}://{event.parsed_url.netloc}" - - def create_paths(self): - return self.paths - - async def test(self, event): - base_url = self.set_base_url(event) - for test_path_result in self.test_paths: - for lower in [True, False]: - test_path = test_path_result[0] - if lower: - test_path = test_path.lower() - subdomain_tag = test_path_result[1] - test_url = f"{base_url}{test_path}" - self.generic_ssrf.debug(f"Sending request to URL: {test_url}") - r = await self.generic_ssrf.helpers.curl(url=test_url) - if r: - self.process(event, r, subdomain_tag) - - def process(self, event, r, subdomain_tag): - response_token = self.generic_ssrf.interactsh_domain.split(".")[0][::-1] - if response_token in r: - echoed_response = True - else: - echoed_response = False - - self.generic_ssrf.interactsh_subdomain_tags[subdomain_tag] = ( - event, - self.technique_description, - self.severity, - echoed_response, - ) - - -class Generic_SSRF(BaseSubmodule): - technique_description = "Generic SSRF (GET)" - severity = "HIGH" - - def set_base_url(self, event): - return event.data - - def create_paths(self): - test_paths = [] - for param in ssrf_params: - query_string = "" - subdomain_tag = self.generic_ssrf.helpers.rand_string(4) - ssrf_canary = f"{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" - self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param - query_string += f"{param}=http://{ssrf_canary}&" - test_paths.append((f"?{query_string.rstrip('&')}", subdomain_tag)) - return test_paths - - -class Generic_SSRF_POST(BaseSubmodule): - technique_description = "Generic SSRF (POST)" - severity = "HIGH" - - def set_base_url(self, event): - return event.data - - async def test(self, event): - test_url = f"{event.data}" - - post_data = {} - for param in ssrf_params: - subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) - self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param - post_data[param] = f"http://{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" - - subdomain_tag_lower = self.generic_ssrf.helpers.rand_string(4, digits=False) - post_data_lower = { - k.lower(): f"http://{subdomain_tag_lower}.{self.generic_ssrf.interactsh_domain}" - for k, v in post_data.items() - } - - post_data_list = [(subdomain_tag, post_data), (subdomain_tag_lower, post_data_lower)] - - for tag, pd in post_data_list: - r = await self.generic_ssrf.helpers.curl(url=test_url, method="POST", post_data=pd) - self.process(event, r, tag) - - -class Generic_XXE(BaseSubmodule): - technique_description = "Generic XXE" - severity = "HIGH" - paths = None - - async def test(self, event): - rand_entity = self.generic_ssrf.helpers.rand_string(4, digits=False) - subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) - - post_body = f""" - - -]> -&{rand_entity};""" - test_url = event.parsed_url.geturl() - r = await self.generic_ssrf.helpers.curl( - url=test_url, method="POST", raw_body=post_body, headers={"Content-type": "application/xml"} - ) - if r: - self.process(event, r, subdomain_tag) - - -class generic_ssrf(BaseModule): - watched_events = ["URL"] - produced_events = ["VULNERABILITY"] - flags = ["active", "aggressive", "web-thorough"] - meta = {"description": "Check for generic SSRFs", "created_date": "2022-07-30", "author": "@liquidsec"} - options = { - "skip_dns_interaction": False, - } - options_desc = { - "skip_dns_interaction": "Do not report DNS interactions (only HTTP interaction)", - } - in_scope_only = True - - deps_apt = ["curl"] - - async def setup(self): - self.submodules = {} - self.interactsh_subdomain_tags = {} - self.parameter_subdomain_tags_map = {} - self.severity = None - self.skip_dns_interaction = self.config.get("skip_dns_interaction", False) - - if self.scan.config.get("interactsh_disable", False) is False: - try: - self.interactsh_instance = self.helpers.interactsh() - self.interactsh_domain = await self.interactsh_instance.register(callback=self.interactsh_callback) - except InteractshError as e: - self.warning(f"Interactsh failure: {e}") - return False - else: - self.warning( - "The generic_ssrf module is completely dependent on interactsh to function, but it is disabled globally. Aborting." - ) - return None - - # instantiate submodules - for m in BaseSubmodule.__subclasses__(): - if m.__name__.startswith("Generic_"): - self.verbose(f"Starting generic_ssrf submodule: {m.__name__}") - self.submodules[m.__name__] = m(self) - - return True - - async def handle_event(self, event): - for s in self.submodules.values(): - await s.test(event) - - async def interactsh_callback(self, r): - protocol = r.get("protocol").upper() - if protocol == "DNS" and self.skip_dns_interaction: - return - - full_id = r.get("full-id", None) - subdomain_tag = full_id.split(".")[0] - - if full_id: - if "." in full_id: - match = self.interactsh_subdomain_tags.get(subdomain_tag) - if not match: - return - matched_event = match[0] - matched_technique = match[1] - matched_severity = match[2] - matched_echoed_response = str(match[3]) - - triggering_param = self.parameter_subdomain_tags_map.get(subdomain_tag, None) - description = f"Out-of-band interaction: [{matched_technique}]" - if triggering_param: - self.debug(f"Found triggering parameter: {triggering_param}") - description += f" [Triggering Parameter: {triggering_param}]" - description += f" [{protocol}] Echoed Response: {matched_echoed_response}" - - self.debug(f"Emitting event with description: {description}") # Debug the final description - - event_type = "VULNERABILITY" if protocol == "HTTP" else "FINDING" - event_data = { - "host": str(matched_event.host), - "url": matched_event.data, - "description": description, - } - if protocol == "HTTP": - event_data["severity"] = matched_severity - - await self.emit_event( - event_data, - event_type, - matched_event, - context=f"{{module}} scanned {matched_event.data} and detected {{event.type}}: {matched_technique}", - ) - else: - # this is likely caused by something trying to resolve the base domain first and can be ignored - self.debug("skipping result because subdomain tag was missing") - - async def cleanup(self): - if self.scan.config.get("interactsh_disable", False) is False: - try: - await self.interactsh_instance.deregister() - self.debug( - f"successfully deregistered interactsh session with correlation_id {self.interactsh_instance.correlation_id}" - ) - except InteractshError as e: - self.warning(f"Interactsh failure: {e}") - - async def finish(self): - if self.scan.config.get("interactsh_disable", False) is False: - await self.helpers.sleep(5) - try: - for r in await self.interactsh_instance.poll(): - await self.interactsh_callback(r) - except InteractshError as e: - self.debug(f"Error in interact.sh: {e}") diff --git a/bbot/modules/git.py b/bbot/modules/git.py index 569aa0e489..0c069bd958 100644 --- a/bbot/modules/git.py +++ b/bbot/modules/git.py @@ -32,7 +32,12 @@ async def handle_event(self, event): if getattr(response, "status_code", 0) == 200 and "[core]" in text and not self.fp_regex.match(text): description = f"Exposed .git config at {url}" await self.emit_event( - {"host": str(event.host), "url": url, "description": description}, + { + "host": str(event.host), + "url": url, + "description": description, + "name": "Exposed .git config", + }, "FINDING", event, context="{module} detected {event.type}: {description}", diff --git a/bbot/modules/gitlab.py b/bbot/modules/gitlab.py index e1ba3850ee..f2379b303a 100644 --- a/bbot/modules/gitlab.py +++ b/bbot/modules/gitlab.py @@ -57,7 +57,11 @@ async def handle_http_response(self, event): ) description = f"GitLab server at {event.host}" await self.emit_event( - {"host": str(event.host), "description": description}, + { + "host": str(event.host), + "description": description, + "name": "GitLab Server", + }, "FINDING", parent=event, context=f"{{module}} detected {{event.type}}: {description}", diff --git a/bbot/modules/graphql_introspection.py b/bbot/modules/graphql_introspection.py index a820bfb6f3..7a6864cfd7 100644 --- a/bbot/modules/graphql_introspection.py +++ b/bbot/modules/graphql_introspection.py @@ -135,8 +135,14 @@ async def handle_event(self, event): filename = self.output_dir / filename with open(filename, "w") as f: json.dump(response_json, f) + relative_path = str(filename.relative_to(self.scan.home)) await self.emit_event( - {"url": url, "description": "GraphQL schema", "path": str(filename.relative_to(self.scan.home))}, + { + "name": "GraphQL Schema", + "url": url, + "description": f"GraphQL Schema at {url}", + "path": relative_path, + }, "FINDING", event, context=f"{{module}} found GraphQL schema at {url}", diff --git a/bbot/modules/host_header.py b/bbot/modules/host_header.py index a60967b8b4..2d664f42d0 100644 --- a/bbot/modules/host_header.py +++ b/bbot/modules/host_header.py @@ -49,6 +49,7 @@ async def interactsh_callback(self, r): { "host": str(matched_event.host), "url": matched_event.data["url"], + "name": "Host Header Spoofing", "description": f"Spoofed Host header ({matched_technique}) [{protocol}] interaction", }, "FINDING", @@ -142,6 +143,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": description, + "name": "Duplicate Host Header Tolerated", }, "FINDING", event, @@ -184,6 +186,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": description, + "name": "Possible Host Header Injection", }, "FINDING", event, diff --git a/bbot/modules/hunt.py b/bbot/modules/hunt.py index 57e064d4bf..d4a0720436 100644 --- a/bbot/modules/hunt.py +++ b/bbot/modules/hunt.py @@ -312,7 +312,11 @@ async def handle_event(self, event): f" Original Value: [{self.helpers.truncate_string(str(event.data['original_value']), 200)}]" ) - data = {"host": str(event.host), "description": description} + data = { + "host": str(event.host), + "description": description, + "name": "Potentially Interesting Parameter", + } url = event.data.get("url", "") if url: data["url"] = url diff --git a/bbot/modules/iis_shortnames.py b/bbot/modules/iis_shortnames.py index 01de9151f7..fcf2b28d1e 100644 --- a/bbot/modules/iis_shortnames.py +++ b/bbot/modules/iis_shortnames.py @@ -232,7 +232,13 @@ class safety_counter_obj: description = f"IIS Shortname Vulnerability Detected. Potentially Vulnerable Method/Techniques: [{','.join(technique_strings)}]" await self.emit_event( - {"severity": "LOW", "host": str(event.host), "url": normalized_url, "description": description}, + { + "severity": "LOW", + "host": str(event.host), + "url": normalized_url, + "description": description, + "name": "IIS Shortnames", + }, "VULNERABILITY", event, context="{module} detected low {event.type}: IIS shortname enumeration", @@ -335,6 +341,7 @@ class safety_counter_obj: if url_hint.lower().endswith(".zip"): await self.emit_event( { + "name": "Possible backup file (zip) in web root", "host": str(event.host), "url": event.data, "description": f"Possible backup file (zip) in web root: {normalized_url}{url_hint}", diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 5cf3ef1fb0..ceb8da05ee 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -194,7 +194,10 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte """ for results in yara_results.values(): for result in results: - event_data = {"description": f"{discovery_context} {yara_rule_settings.description}"} + event_data = { + "name": f"{discovery_context} {yara_rule_settings.description}", + "description": f"{discovery_context} {yara_rule_settings.description}", + } if yara_rule_settings.emit_match: event_data["description"] += f" [{result}]" await self.report(event_data, event, yara_rule_settings, discovery_context) @@ -261,7 +264,8 @@ async def report( # If a description is not set and is needed, provide a basic one if event_type == "FINDING" and "description" not in event_data.keys(): - event_data["description"] = f"{discovery_context} {yara_rule_settings['self.description']}" + event_data["name"] = f"{discovery_context} {yara_rule_settings.description}" + event_data["description"] = f"{discovery_context} {yara_rule_settings.description}" subject = "" if isinstance(event_data, str): subject = f" {event_data}" @@ -281,7 +285,9 @@ def __init__(self, excavate): async def process(self, yara_results, event, yara_rule_settings, discovery_context): for identifier, results in yara_results.items(): for result in results: - event_data = {} + event_data = { + "name": f"Custom Yara Rule [{self.name}]", + } description_string = ( f" with description: [{yara_rule_settings.description}]" if yara_rule_settings.description else "" ) @@ -719,7 +725,8 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte for identifier in yara_results.keys(): for findings in yara_results[identifier]: event_data = { - "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})" + "name": "Possible Verbose Error Message", + "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})", } await self.report(event_data, event, yara_rule_settings, discovery_context, event_type="FINDING") @@ -750,7 +757,8 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte for identifier in yara_results.keys(): for findings in yara_results[identifier]: event_data = { - "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})" + "name": "Possible Serialized Object", + "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})", } await self.report(event_data, event, yara_rule_settings, discovery_context, event_type="FINDING") @@ -796,7 +804,11 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte def abort_if(e): return e.scope_distance > 0 - finding_data = {"host": str(host), "description": f"Non-HTTP URI: {parsed_url.geturl()}"} + finding_data = { + "host": str(host), + "name": "Non-HTTP URI", + "description": f"Non-HTTP URI: {parsed_url.geturl()}", + } await self.report(finding_data, event, yara_rule_settings, discovery_context, abort_if=abort_if) protocol_data = {"protocol": parsed_url.scheme, "host": str(host)} if port: diff --git a/bbot/modules/lightfuzz/lightfuzz.py b/bbot/modules/lightfuzz/lightfuzz.py index e47d0861f2..3e7f8ae460 100644 --- a/bbot/modules/lightfuzz/lightfuzz.py +++ b/bbot/modules/lightfuzz/lightfuzz.py @@ -77,6 +77,7 @@ async def interactsh_callback(self, r): "severity": "CRITICAL", "host": str(details["event"].host), "url": details["event"].data["url"], + "name": "Lightfuzz - OS Command Injection", "description": f"OS Command Injection (OOB Interaction) Type: [{details['type']}] Parameter Name: [{details['name']}] Probe: [{details['probe']}]", }, "VULNERABILITY", @@ -103,7 +104,12 @@ async def run_submodule(self, submodule, event): await submodule_instance.fuzz() if len(submodule_instance.results) > 0: for r in submodule_instance.results: - event_data = {"host": str(event.host), "url": event.data["url"], "description": r["description"]} + event_data = { + "host": str(event.host), + "url": event.data["url"], + "name": r["name"], + "description": r["description"], + } envelopes = getattr(event, "envelopes", None) envelope_summary = getattr(envelopes, "summary", None) diff --git a/bbot/modules/lightfuzz/submodules/cmdi.py b/bbot/modules/lightfuzz/submodules/cmdi.py index 11576f1dc5..51b256c08f 100644 --- a/bbot/modules/lightfuzz/submodules/cmdi.py +++ b/bbot/modules/lightfuzz/submodules/cmdi.py @@ -75,6 +75,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Command Injection", "description": f"POSSIBLE OS Command Injection. {self.metadata()} Detection Method: [echo canary] CMD Probe Delimeters: [{' '.join(positive_detections)}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/crypto.py b/bbot/modules/lightfuzz/submodules/crypto.py index 1134d95af9..b5bc846f64 100644 --- a/bbot/modules/lightfuzz/submodules/crypto.py +++ b/bbot/modules/lightfuzz/submodules/crypto.py @@ -286,6 +286,7 @@ async def padding_oracle(self, probe_value, cookies): { "type": "VULNERABILITY", "severity": "HIGH", + "name": "Padding Oracle Vulnerability", "description": f"Padding Oracle Vulnerability. Block size: [{str(block_size)}] {self.metadata()}", "context": context, } @@ -320,6 +321,7 @@ async def error_string_search(self, text_dict, baseline_text): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Cryptographic Error", "description": f"Possible Cryptographic Error. {self.metadata()} Strings: [{','.join(unique_matches)}] Detection Technique(s): [{','.join(matching_techniques)}]", "context": context, } @@ -414,6 +416,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Probable Cryptographic Parameter", "description": f"Probable Cryptographic Parameter. {self.metadata()} Detection Technique(s): [{', '.join(confirmed_techniques)}]", "context": context, } @@ -468,6 +471,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Length Extension Attack", "description": f"Possible {self.event.data['type']} parameter with {hash_instance.name.upper()} Hash as value. {self.metadata()}, linked to additional parameter [{additional_param_name}]", "context": context, } diff --git a/bbot/modules/lightfuzz/submodules/path.py b/bbot/modules/lightfuzz/submodules/path.py index 44047e2907..39bdd9c7c3 100644 --- a/bbot/modules/lightfuzz/submodules/path.py +++ b/bbot/modules/lightfuzz/submodules/path.py @@ -122,6 +122,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Path Traversal", "description": f"POSSIBLE Path Traversal. {self.metadata()} Detection Method: [{path_technique}]", } ) @@ -149,6 +150,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Path Traversal", "description": f"POSSIBLE Path Traversal. {self.metadata()} Detection Method: [Absolute Path: {path}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/serial.py b/bbot/modules/lightfuzz/submodules/serial.py index 9a7dd90135..ae3724e87b 100644 --- a/bbot/modules/lightfuzz/submodules/serial.py +++ b/bbot/modules/lightfuzz/submodules/serial.py @@ -166,6 +166,7 @@ def get_title(text): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Unsafe Deserialization", "description": f"POSSIBLE Unsafe Deserialization. {self.metadata()} Technique: [Error Resolution (Baseline: [{payload_baseline.baseline.status_code}] {baseline_title} -> Probe: [{status_code}] {probe_title})] Serialization Payload: [{type}]", } ) @@ -183,6 +184,7 @@ def get_title(text): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Unsafe Deserialization", "description": f"POSSIBLE Unsafe Deserialization. {self.metadata()} Technique: [Differential Error Analysis] Error-String: [{serialization_error}] Payload: [{type}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/sqli.py b/bbot/modules/lightfuzz/submodules/sqli.py index a2adfd2222..e0f4385f8f 100644 --- a/bbot/modules/lightfuzz/submodules/sqli.py +++ b/bbot/modules/lightfuzz/submodules/sqli.py @@ -100,6 +100,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible SQL Injection", "description": f"Possible SQL Injection. {self.metadata()} Detection Method: [SQL Error Detection] Detected String: [{sqli_error_string}]", } ) @@ -120,6 +121,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible SQL Injection", "description": f"Possible SQL Injection. {self.metadata()} Detection Method: [Single Quote/Two Single Quote, Code Change ({http_compare.baseline.status_code}->{single_quote[3].status_code}->{double_single_quote[3].status_code})]", } ) @@ -180,6 +182,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Blind SQL Injection", "description": f"Possible Blind SQL Injection. {self.metadata()} Detection Method: [Delay Probe ({p})]", } ) diff --git a/bbot/modules/lightfuzz/submodules/ssti.py b/bbot/modules/lightfuzz/submodules/ssti.py index 544b10b103..d871ec03a1 100644 --- a/bbot/modules/lightfuzz/submodules/ssti.py +++ b/bbot/modules/lightfuzz/submodules/ssti.py @@ -33,6 +33,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Server-side Template Injection", "description": f"POSSIBLE Server-side Template Injection. {self.metadata()} Detection Method: [Integer Multiplication] Payload: [{probe_value}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/xss.py b/bbot/modules/lightfuzz/submodules/xss.py index ded3ca6b93..fed860191f 100644 --- a/bbot/modules/lightfuzz/submodules/xss.py +++ b/bbot/modules/lightfuzz/submodules/xss.py @@ -91,6 +91,7 @@ async def check_probe(self, cookies, probe, match, context): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Reflected XSS", "description": f"Possible Reflected XSS. Parameter: [{self.event.data['name']}] Context: [{context}] Parameter Type: [{self.event.data['type']}]", } ) diff --git a/bbot/modules/medusa.py b/bbot/modules/medusa.py index 46fbc1ae2e..f8e5d60573 100644 --- a/bbot/modules/medusa.py +++ b/bbot/modules/medusa.py @@ -143,7 +143,8 @@ async def handle_event(self, event): self.info(f"Medusa stderr: {result.stderr}") async for message in self.parse_output(result.stdout, snmp_version): - vuln_event = self.create_vuln_event("CRITICAL", message, event) + vuln_name = f"Valid SNMPV{snmp_version} Credentials Found!" + vuln_event = self.create_vuln_event("CRITICAL", vuln_name, message, event) await self.emit_event(vuln_event) # else: Medusa supports various protocols which could in theory be implemented later on. @@ -219,17 +220,12 @@ async def construct_command(self, host, port, protocol, protocol_version): return cmd - def create_vuln_event(self, severity, description, source_event): + def create_vuln_event(self, severity, name, description, source_event): host = str(source_event.host) port = str(source_event.port) return self.make_event( - { - "severity": severity, - "host": host, - "port": port, - "description": description, - }, + {"severity": severity, "host": host, "port": port, "description": description, "name": name}, "VULNERABILITY", source_event, ) diff --git a/bbot/modules/newsletters.py b/bbot/modules/newsletters.py index 114f7d66fd..3b23307952 100644 --- a/bbot/modules/newsletters.py +++ b/bbot/modules/newsletters.py @@ -51,7 +51,12 @@ async def handle_event(self, event): result = self.find_type(soup) if result: description = "Found a Newsletter Submission Form that could be used for email bombing attacks" - data = {"host": str(_event.host), "description": description, "url": _event.data["url"]} + data = { + "host": str(_event.host), + "description": description, + "url": _event.data["url"], + "name": "Newsletter Submission Form", + } await self.emit_event( data, "FINDING", diff --git a/bbot/modules/ntlm.py b/bbot/modules/ntlm.py index 67268616de..90c5a986c2 100644 --- a/bbot/modules/ntlm.py +++ b/bbot/modules/ntlm.py @@ -120,6 +120,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": f"NTLM AUTH: {ntlm_resp_decoded}", + "name": "NTLM Authentication", }, "FINDING", parent=event, diff --git a/bbot/modules/nuclei.py b/bbot/modules/nuclei.py index 62fd6e4ffb..3a0c863405 100644 --- a/bbot/modules/nuclei.py +++ b/bbot/modules/nuclei.py @@ -175,6 +175,7 @@ async def handle_batch(self, *events): "host": str(parent_event.host), "url": url, "description": description_string, + "name": f"Nuclei Vuln - {name}", }, "FINDING", parent_event, @@ -187,6 +188,7 @@ async def handle_batch(self, *events): "host": str(parent_event.host), "url": url, "description": description_string, + "name": f"Nuclei Vuln - {name}", }, "VULNERABILITY", parent_event, diff --git a/bbot/modules/oauth.py b/bbot/modules/oauth.py index 58c0507c09..559dea9313 100644 --- a/bbot/modules/oauth.py +++ b/bbot/modules/oauth.py @@ -62,6 +62,7 @@ async def handle_event(self, event): if token_endpoint: finding_event = self.make_event( { + "name": "OpenID Connect Endpoint", "description": f"OpenID Connect Endpoint (domain: {source_domain}) found at {url}", "host": event.host, "url": url, @@ -101,6 +102,7 @@ async def handle_event(self, event): description = f"Potentially Sprayable OAUTH Endpoint (domain: {source_domain}) at {url}" oauth_finding = self.make_event( { + "name": "Potentially Sprayable OAUTH Endpoint", "description": description, "host": event.host, "url": url, diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py new file mode 100644 index 0000000000..064c00af7c --- /dev/null +++ b/bbot/modules/output/elastic.py @@ -0,0 +1,32 @@ +from .http import HTTP + + +class Elastic(HTTP): + """ + docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 + """ + + watched_events = ["*"] + meta = { + "description": "Send scan results to Elasticsearch", + "created_date": "2022-11-21", + "author": "@TheTechromancer", + } + options = { + "url": "https://localhost:9200/bbot_events/_doc", + "username": "elastic", + "password": "bbotislife", + "timeout": 10, + } + options_desc = { + "url": "Elastic URL (e.g. https://localhost:9200//_doc)", + "username": "Elastic username", + "password": "Elastic password", + "timeout": "HTTP timeout", + } + + async def cleanup(self): + # refresh the index + doc_regex = self.helpers.re.compile(r"/[^/]+$") + refresh_url = doc_regex.sub("/_refresh", self.url) + await self.helpers.request(refresh_url, auth=self.auth) diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 9d9241da0b..28fa917fc7 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -1,3 +1,6 @@ +from omegaconf import OmegaConf + +from bbot.models.pydantic import Event from bbot.modules.output.base import BaseOutputModule @@ -14,8 +17,8 @@ class HTTP(BaseOutputModule): "bearer": "", "username": "", "password": "", + "headers": {}, "timeout": 10, - "siem_friendly": False, } options_desc = { "url": "Web URL", @@ -23,16 +26,15 @@ class HTTP(BaseOutputModule): "bearer": "Authorization Bearer token", "username": "Username (basic auth)", "password": "Password (basic auth)", + "headers": "Additional headers to send with the request", "timeout": "HTTP timeout", - "siem_friendly": "Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc.", } async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) - self.siem_friendly = self.config.get("siem_friendly", False) - self.headers = {} + self.headers = OmegaConf.to_object(self.config.get("headers", OmegaConf.create())) bearer = self.config.get("bearer", "") if bearer: self.headers["Authorization"] = f"Bearer {bearer}" @@ -51,12 +53,15 @@ async def setup(self): async def handle_event(self, event): while 1: + event_json = event.json() + event_pydantic = Event(**event_json) + event_json = event_pydantic.model_dump(exclude_none=True) response = await self.helpers.request( url=self.url, method=self.method, auth=self.auth, headers=self.headers, - json=event.json(siem_friendly=self.siem_friendly), + json=event_json, ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/modules/output/json.py b/bbot/modules/output/json.py index a35fa6aed7..b93d1e4e3f 100644 --- a/bbot/modules/output/json.py +++ b/bbot/modules/output/json.py @@ -11,20 +11,18 @@ class JSON(BaseOutputModule): "created_date": "2022-04-07", "author": "@TheTechromancer", } - options = {"output_file": "", "siem_friendly": False} + options = {"output_file": ""} options_desc = { "output_file": "Output to file", - "siem_friendly": "Output JSON in a SIEM-friendly format for ingestion into Elastic, Splunk, etc.", } _preserve_graph = True async def setup(self): self._prep_output_dir("output.json") - self.siem_friendly = self.config.get("siem_friendly", False) return True async def handle_event(self, event): - event_json = event.json(siem_friendly=self.siem_friendly) + event_json = event.json() event_str = json.dumps(event_json) if self.file is not None: self.file.write(event_str + "\n") diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py new file mode 100644 index 0000000000..01eeeb2fd6 --- /dev/null +++ b/bbot/modules/output/kafka.py @@ -0,0 +1,48 @@ +import json +from aiokafka import AIOKafkaProducer + +from bbot.modules.output.base import BaseOutputModule + + +class Kafka(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a Kafka topic", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + options_desc = { + "bootstrap_servers": "A comma-separated list of Kafka server addresses", + "topic": "The Kafka topic to publish events to", + } + deps_pip = ["aiokafka~=0.12.0"] + + async def setup(self): + self.bootstrap_servers = self.config.get("bootstrap_servers", "localhost:9092") + self.topic = self.config.get("topic", "bbot_events") + self.producer = AIOKafkaProducer(bootstrap_servers=self.bootstrap_servers) + + # Start the producer + await self.producer.start() + self.verbose("Kafka producer started successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + while 1: + try: + await self.producer.send_and_wait(self.topic, event_data) + break + except Exception as e: + self.warning(f"Error sending event to Kafka: {e}, retrying...") + await self.helpers.sleep(1) + + async def cleanup(self): + # Stop the producer + await self.producer.stop() + self.verbose("Kafka producer stopped successfully") diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py new file mode 100644 index 0000000000..f90c4aad53 --- /dev/null +++ b/bbot/modules/output/mongo.py @@ -0,0 +1,92 @@ +from motor.motor_asyncio import AsyncIOMotorClient + +from bbot.models.pydantic import Event, Scan, Target +from bbot.modules.output.base import BaseOutputModule + + +class Mongo(BaseOutputModule): + """ + docker run --rm -p 27017:27017 mongo + """ + + watched_events = ["*"] + meta = { + "description": "Output scan data to a MongoDB database", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "uri": "mongodb://localhost:27017", + "database": "bbot", + "username": "", + "password": "", + "collection_prefix": "", + } + options_desc = { + "uri": "The URI of the MongoDB server", + "database": "The name of the database to use", + "username": "The username to use to connect to the database", + "password": "The password to use to connect to the database", + "collection_prefix": "Prefix the name of each collection with this string", + } + deps_pip = ["motor~=3.6.0"] + + async def setup(self): + self.uri = self.config.get("uri", "mongodb://localhost:27017") + self.username = self.config.get("username", "") + self.password = self.config.get("password", "") + self.db_client = AsyncIOMotorClient(self.uri, username=self.username, password=self.password) + + # Ping the server to confirm a successful connection + try: + await self.db_client.admin.command("ping") + self.verbose("MongoDB connection successful") + except Exception as e: + return False, f"Failed to connect to MongoDB: {e}" + + self.db_name = self.config.get("database", "bbot") + self.db = self.db_client[self.db_name] + self.collection_prefix = self.config.get("collection_prefix", "") + self.events_collection = self.db[f"{self.collection_prefix}events"] + self.scans_collection = self.db[f"{self.collection_prefix}scans"] + self.targets_collection = self.db[f"{self.collection_prefix}targets"] + + # Build an index for each field in reverse_host and host + for fieldname, metadata in Event.indexed_fields().items(): + if "indexed" in metadata: + unique = "unique" in metadata + await self.events_collection.create_index([(fieldname, 1)], unique=unique) + self.verbose(f"Index created for field: {fieldname} (unique={unique})") + + return True + + async def handle_event(self, event): + event_json = event.json() + event_pydantic = Event(**event_json) + while 1: + try: + await self.events_collection.insert_one(event_pydantic.model_dump()) + break + except Exception as e: + self.warning(f"Error inserting event into MongoDB: {e}, retrying...") + self.trace() + await self.helpers.sleep(1) + + if event.type == "SCAN": + scan_json = Scan(**event.data_json).model_dump() + existing_scan = await self.scans_collection.find_one({"id": event_pydantic.id}) + if existing_scan: + await self.scans_collection.replace_one({"id": event_pydantic.id}, scan_json) + self.verbose(f"Updated scan event with ID: {event_pydantic.id}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with ID: {event_pydantic.id}") + + target_data = scan_json.get("target", {}) + target = Target(**target_data) + existing_target = await self.targets_collection.find_one({"hash": target.hash}) + if existing_target: + await self.targets_collection.replace_one({"hash": target.hash}, target.model_dump()) + else: + await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/modules/output/nats.py b/bbot/modules/output/nats.py new file mode 100644 index 0000000000..569645cc3b --- /dev/null +++ b/bbot/modules/output/nats.py @@ -0,0 +1,53 @@ +import json +import nats +from bbot.modules.output.base import BaseOutputModule + + +class NATS(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a NATS subject", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "servers": [], + "subject": "bbot_events", + } + options_desc = { + "servers": "A list of NATS server addresses", + "subject": "The NATS subject to publish events to", + } + deps_pip = ["nats-py"] + + async def setup(self): + self.servers = list(self.config.get("servers", [])) + if not self.servers: + return False, "NATS servers are required" + self.subject = self.config.get("subject", "bbot_events") + + # Connect to the NATS server + try: + self.nc = await nats.connect(self.servers) + except Exception as e: + import traceback + + return False, f"Error connecting to NATS: {e}\n{traceback.format_exc()}" + self.verbose("NATS client connected successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + while 1: + try: + await self.nc.publish(self.subject, event_data) + break + except Exception as e: + self.warning(f"Error sending event to NATS: {e}, retrying...") + await self.helpers.sleep(1) + + async def cleanup(self): + # Close the NATS connection + await self.nc.close() + self.verbose("NATS client disconnected successfully") diff --git a/bbot/modules/output/nmap_xml.py b/bbot/modules/output/nmap_xml.py index 52698e0de8..9a0cee27eb 100644 --- a/bbot/modules/output/nmap_xml.py +++ b/bbot/modules/output/nmap_xml.py @@ -1,6 +1,7 @@ import sys from xml.dom import minidom from datetime import datetime +from zoneinfo import ZoneInfo from xml.etree.ElementTree import Element, SubElement, tostring from bbot import __version__ @@ -76,7 +77,7 @@ async def handle_event(self, event): async def report(self): scan_start_time = str(int(self.scan.start_time.timestamp())) scan_start_time_str = self.scan.start_time.strftime("%a %b %d %H:%M:%S %Y") - scan_end_time = datetime.now() + scan_end_time = datetime.now(ZoneInfo("UTC")) scan_end_time_str = scan_end_time.strftime("%a %b %d %H:%M:%S %Y") scan_end_time_timestamp = str(scan_end_time.timestamp()) scan_duration = scan_end_time - self.scan.start_time diff --git a/bbot/modules/output/rabbitmq.py b/bbot/modules/output/rabbitmq.py new file mode 100644 index 0000000000..ba4205940d --- /dev/null +++ b/bbot/modules/output/rabbitmq.py @@ -0,0 +1,56 @@ +import json +import aio_pika + +from bbot.modules.output.base import BaseOutputModule + + +class RabbitMQ(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a RabbitMQ queue", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "url": "amqp://guest:guest@localhost/", + "queue": "bbot_events", + } + options_desc = { + "url": "The RabbitMQ connection URL", + "queue": "The RabbitMQ queue to publish events to", + } + deps_pip = ["aio_pika~=9.5.0"] + + async def setup(self): + self.rabbitmq_url = self.config.get("url", "amqp://guest:guest@localhost/") + self.queue_name = self.config.get("queue", "bbot_events") + + # Connect to RabbitMQ + self.connection = await aio_pika.connect_robust(self.rabbitmq_url) + self.channel = await self.connection.channel() + + # Declare the queue + self.queue = await self.channel.declare_queue(self.queue_name, durable=True) + self.verbose("RabbitMQ connection and queue setup successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + + # Publish the message to the queue + while 1: + try: + await self.channel.default_exchange.publish( + aio_pika.Message(body=event_data), + routing_key=self.queue_name, + ) + break + except Exception as e: + self.error(f"Error publishing message to RabbitMQ: {e}, rerying...") + await self.helpers.sleep(1) + + async def cleanup(self): + # Close the connection + await self.connection.close() + self.verbose("RabbitMQ connection closed successfully") diff --git a/bbot/modules/output/zeromq.py b/bbot/modules/output/zeromq.py new file mode 100644 index 0000000000..938f234545 --- /dev/null +++ b/bbot/modules/output/zeromq.py @@ -0,0 +1,46 @@ +import zmq +import json + +from bbot.modules.output.base import BaseOutputModule + + +class ZeroMQ(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a ZeroMQ socket (PUB)", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "zmq_address": "", + } + options_desc = { + "zmq_address": "The ZeroMQ socket address to publish events to (e.g. tcp://localhost:5555)", + } + + async def setup(self): + self.zmq_address = self.config.get("zmq_address", "") + if not self.zmq_address: + return False, "ZeroMQ address is required" + self.context = zmq.asyncio.Context() + self.socket = self.context.socket(zmq.PUB) + self.socket.bind(self.zmq_address) + self.verbose("ZeroMQ publisher socket bound successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + while 1: + try: + await self.socket.send(event_data) + break + except Exception as e: + self.warning(f"Error sending event to ZeroMQ: {e}, retrying...") + await self.helpers.sleep(1) + + async def cleanup(self): + # Close the socket + self.socket.close() + self.context.term() + self.verbose("ZeroMQ publisher socket closed successfully") diff --git a/bbot/modules/reflected_parameters.py b/bbot/modules/reflected_parameters.py index f7e17e57e6..a942edd473 100644 --- a/bbot/modules/reflected_parameters.py +++ b/bbot/modules/reflected_parameters.py @@ -25,7 +25,12 @@ async def handle_event(self, event): description += ( f" Original Value: [{self.helpers.truncate_string(str(event.data['original_value']), 200)}]" ) - data = {"host": str(event.host), "description": description, "url": url} + data = { + "host": str(event.host), + "description": description, + "url": url, + "name": "Reflected Parameter", + } await self.emit_event(data, "FINDING", event) async def detect_reflection(self, event, url): @@ -56,17 +61,18 @@ async def send_probe_with_canary(self, event, parameter_name, parameter_value, c data = None json_data = None params = {parameter_name: parameter_value, "c4n4ry": canary_value} + param_type = event.data["type"] - if event.data["type"] == "GETPARAM": + if param_type == "GETPARAM": url = f"{url}?{parameter_name}={parameter_value}&c4n4ry={canary_value}" - elif event.data["type"] == "COOKIE": + elif param_type == "COOKIE": cookies.update(params) - elif event.data["type"] == "HEADER": + elif param_type == "HEADER": headers.update(params) - elif event.data["type"] == "POSTPARAM": + elif param_type == "POSTPARAM": method = "POST" data = params - elif event.data["type"] == "BODYJSON": + elif param_type == "BODYJSON": method = "POST" json_data = params diff --git a/bbot/modules/retirejs.py b/bbot/modules/retirejs.py index 27e8fec407..4b58c1b23d 100644 --- a/bbot/modules/retirejs.py +++ b/bbot/modules/retirejs.py @@ -183,6 +183,7 @@ async def handle_event(self, event): description_parts.append(f"Affected versions: [>= {at_or_above}]") description = " ".join(description_parts) data = { + "name": "Vulnerable JavaScript Library", "description": description, "severity": severity, "component": component, diff --git a/bbot/modules/shodan_idb.py b/bbot/modules/shodan_idb.py index 4a3e2b214a..72fadfaf08 100644 --- a/bbot/modules/shodan_idb.py +++ b/bbot/modules/shodan_idb.py @@ -40,7 +40,7 @@ class shodan_idb(BaseModule): """ watched_events = ["IP_ADDRESS", "DNS_NAME"] - produced_events = ["TECHNOLOGY", "VULNERABILITY", "FINDING", "OPEN_TCP_PORT", "DNS_NAME"] + produced_events = ["TECHNOLOGY", "FINDING", "OPEN_TCP_PORT", "DNS_NAME"] flags = ["passive", "safe", "portscan", "subdomain-enum"] meta = { "description": "Query Shodan's InternetDB for open ports, hostnames, technologies, and vulnerabilities", @@ -143,7 +143,12 @@ async def _parse_response(self, data: dict, event, ip): if vulns: vulns_str = ", ".join([str(v) for v in vulns]) await self.emit_event( - {"description": f"Shodan reported possible vulnerabilities: {vulns_str}", "host": str(event.host)}, + { + "description": f"Shodan reported possible vulnerabilities: {vulns_str}", + "host": str(event.host), + "cves": vulns, + "name": "Shodan - Possible Vulnerabilities", + }, "FINDING", parent=event, context=f'{{module}} queried Shodan\'s InternetDB API for "{query_host}" and found potential {{event.type}}: {vulns_str}', diff --git a/bbot/modules/smuggler.py b/bbot/modules/smuggler.py index 357fec1885..4c7a78cf9e 100644 --- a/bbot/modules/smuggler.py +++ b/bbot/modules/smuggler.py @@ -40,7 +40,12 @@ async def handle_event(self, event): text = f.split(":")[1].split("-")[0].strip() description = f"[HTTP SMUGGLER] [{text}] Technique: {technique}" await self.emit_event( - {"host": str(event.host), "url": event.data, "description": description}, + { + "host": str(event.host), + "url": event.data, + "description": description, + "name": "Possible HTTP Smuggling", + }, "FINDING", parent=event, context=f"{{module}} scanned {event.data} and found HTTP smuggling ({{event.type}}): {text}", diff --git a/bbot/modules/telerik.py b/bbot/modules/telerik.py index 3cd0c8eed9..0e4def5b5e 100644 --- a/bbot/modules/telerik.py +++ b/bbot/modules/telerik.py @@ -242,7 +242,12 @@ async def handle_event(self, event): description = f"Telerik RAU AXD Handler detected. Verbose Errors Enabled: [{str(verbose_errors)}] Version Guess: [{version}]" await self.emit_event( - {"host": str(event.host), "url": f"{base_url}{webresource}", "description": description}, + { + "host": str(event.host), + "url": f"{base_url}{webresource}", + "description": description, + "name": "Telerik Handler", + }, "FINDING", event, context=f"{{module}} scanned {base_url} and identified {{event.type}}: Telerik RAU AXD Handler", @@ -278,6 +283,7 @@ async def handle_event(self, event): "description": description, "host": str(event.host), "url": f"{base_url}{webresource}", + "name": "Telerik RCE", }, "VULNERABILITY", event, @@ -307,7 +313,12 @@ async def handle_event(self, event): self.debug(f"Detected Telerik UI instance ({dh})") description = "Telerik DialogHandler detected" await self.emit_event( - {"host": str(event.host), "url": f"{base_url}{dh}", "description": description}, + { + "host": str(event.host), + "url": f"{base_url}{dh}", + "description": description, + "name": "Telerik Handler", + }, "FINDING", event, ) @@ -331,6 +342,7 @@ async def handle_event(self, event): "host": str(event.host), "url": f"{base_url}{spellcheckhandler}", "description": description, + "name": "Telerik Handler", }, "FINDING", event, @@ -350,6 +362,7 @@ async def handle_event(self, event): "host": str(event.host), "url": f"{base_url}{chartimagehandler}", "description": "Telerik ChartImage AXD Handler Detected", + "name": "Telerik Handler", }, "FINDING", event, @@ -366,6 +379,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": "Telerik DialogHandler [SerializedParameters] Detected in HTTP Response", + "name": "Telerik Handler", }, "FINDING", event, @@ -377,6 +391,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": "Telerik AsyncUpload [serializedConfiguration] Detected in HTTP Response", + "name": "Telerik AsyncUpload", }, "FINDING", event, diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index 3cd899d71c..537910dbcc 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -67,7 +67,12 @@ async def handle_storage_bucket(self, event): if self.supports_open_check: description, tags = await self._check_bucket_open(bucket_name, url) if description: - event_data = {"host": event.host, "url": url, "description": description} + event_data = { + "host": event.host, + "url": url, + "description": description, + "name": "Open Storage Bucket", + } await self.emit_event( event_data, "FINDING", diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py index 39b4e6f00e..42f5494555 100644 --- a/bbot/modules/templates/sql.py +++ b/bbot/modules/templates/sql.py @@ -3,7 +3,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from bbot.db.sql.models import Event, Scan, Target +from bbot.models.sql import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py index c60a19cf97..e990fe6e36 100644 --- a/bbot/modules/trufflehog.py +++ b/bbot/modules/trufflehog.py @@ -122,6 +122,7 @@ async def handle_event(self, event): verified_str = "Verified" if verified else "Possible" finding_type = "VULNERABILITY" if verified else "FINDING" data = { + "name": f"TruffleHog - {detector_name}", "description": f"{verified_str} Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", } if host: diff --git a/bbot/modules/url_manipulation.py b/bbot/modules/url_manipulation.py index c36b7c39d5..e7863f2308 100644 --- a/bbot/modules/url_manipulation.py +++ b/bbot/modules/url_manipulation.py @@ -77,9 +77,14 @@ async def handle_event(self, event): if str(subject_response.status_code).startswith("2"): if "body" in reasons: reported_signature = f"Modified URL: {sig[1]}" - description = f"Url Manipulation: [{','.join(reasons)}] Sig: [{reported_signature}]" + description = f"URL Manipulation: [{','.join(reasons)}] Sig: [{reported_signature}]" await self.emit_event( - {"description": description, "host": str(event.host), "url": event.data}, + { + "description": description, + "host": str(event.host), + "url": event.data, + "name": "URL Manipulation", + }, "FINDING", parent=event, context=f"{{module}} probed {event.data} and identified {{event.type}}: {description}", diff --git a/bbot/modules/wpscan.py b/bbot/modules/wpscan.py index 4f1a63a1b5..6b128e77c8 100644 --- a/bbot/modules/wpscan.py +++ b/bbot/modules/wpscan.py @@ -174,7 +174,12 @@ def parse_wp_misc(self, interesting_json, base_url, source_event): if url_event: yield url_event yield self.make_event( - {"description": description_string, "url": url, "host": str(source_event.host)}, + { + "description": description_string, + "url": url, + "host": str(source_event.host), + "name": "WPScan - Possible Vulnerability", + }, "FINDING", source_event, ) @@ -197,6 +202,7 @@ def parse_wp_version(self, version_json, url, source_event): "host": str(source_event.host), "url": url, "description": self.vulnerability_to_s(wp_vuln), + "name": "WPScan - Possible Vulnerability", }, "VULNERABILITY", source_event, @@ -222,6 +228,7 @@ def parse_wp_themes(self, theme_json, url, source_event): "host": str(source_event.host), "url": url, "description": self.vulnerability_to_s(theme_vuln), + "name": "WPScan - Possible Vulnerability", }, "VULNERABILITY", source_event, @@ -251,6 +258,7 @@ def parse_wp_plugins(self, plugins_json, base_url, source_event): "host": str(source_event.host), "url": url, "description": self.vulnerability_to_s(vuln), + "name": "WPScan - Possible Vulnerability", }, "VULNERABILITY", source_event, diff --git a/bbot/scanner/dispatcher.py b/bbot/scanner/dispatcher.py index a9c56c2b72..efd3270903 100644 --- a/bbot/scanner/dispatcher.py +++ b/bbot/scanner/dispatcher.py @@ -1,7 +1,6 @@ import logging import traceback - -log = logging.getLogger("bbot.scanner.dispatcher") +import contextlib class Dispatcher: @@ -11,6 +10,7 @@ class Dispatcher: def set_scan(self, scan): self.scan = scan + self.log = logging.getLogger("bbot.scanner.dispatcher") async def on_start(self, scan): return @@ -24,9 +24,10 @@ async def on_status(self, status, scan_id): """ self.scan.debug(f"Setting scan status to {status}") - async def catch(self, callback, *args, **kwargs): + @contextlib.contextmanager + def catch(self): try: - return await callback(*args, **kwargs) + yield except Exception as e: - log.error(f"Error in {callback.__qualname__}(): {e}") - log.trace(traceback.format_exc()) + self.log.error(f"Error in dispatcher: {e}") + self.log.trace(traceback.format_exc()) diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py index 18aa090424..137cd26d43 100644 --- a/bbot/scanner/preset/args.py +++ b/bbot/scanner/preset/args.py @@ -239,7 +239,7 @@ def create_parser(self, *args, **kwargs): target.add_argument( "--strict-scope", action="store_true", - help="Don't consider subdomains of target/whitelist to be in-scope", + help="Don't consider subdomains of target/whitelist to be in-scope - exact matches only", ) presets = p.add_argument_group(title="Presets") presets.add_argument( diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index b81bfda65c..623dbcec95 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -115,7 +115,7 @@ class Preset(metaclass=BasePreset): def __init__( self, - *targets, + *target, whitelist=None, blacklist=None, modules=None, @@ -142,7 +142,7 @@ def __init__( Initializes the Preset class. Args: - *targets (str): Target(s) to scan. Types supported: hostnames, IPs, CIDRs, emails, open ports. + *target (str): Target(s) to scan. Types supported: hostnames, IPs, CIDRs, emails, open ports. whitelist (list, optional): Whitelisted target(s) to scan. Defaults to the same as `targets`. blacklist (list, optional): Blacklisted target(s). Takes ultimate precedence. Defaults to empty. modules (list[str], optional): List of scan modules to enable for the scan. Defaults to empty list. @@ -262,7 +262,7 @@ def __init__( # target / whitelist / blacklist # these are temporary receptacles until they all get .baked() together - self._seeds = set(targets if targets else []) + self._seeds = set(target if target else []) self._whitelist = set(whitelist) if whitelist else whitelist self._blacklist = set(blacklist if blacklist else []) @@ -288,7 +288,7 @@ def target(self): @property def seeds(self): - if self._seeds is None: + if self._target is None: raise ValueError("Cannot access target before preset is baked (use ._seeds instead)") return self.target.seeds @@ -402,6 +402,8 @@ def bake(self, scan=None): """ Return a "baked" copy of this preset, ready for use by a BBOT scan. + Presets can be merged and modified before baking, but once baked, they are immutable. + Baking a preset finalizes it by populating `preset.modules` based on flags, performing final validations, and substituting environment variables in preloaded modules. It also evaluates custom `conditions` as specified in the preset. @@ -486,7 +488,7 @@ def bake(self, scan=None): *list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, - strict_scope=self.strict_scope, + strict_dns_scope=self.strict_scope, ) if scan is not None: @@ -655,8 +657,11 @@ def from_dict(cls, preset_dict, name=None, _exclude=None, _log=False): Examples: >>> preset = Preset.from_dict({"target": ["evilcorp.com"], "modules": ["portscan"]}) """ + # tolerate both "target" and "targets", since this is a common oopsie + targets = preset_dict.get("target", []) + targets += preset_dict.get("targets", []) new_preset = cls( - *preset_dict.get("target", []), + *targets, whitelist=preset_dict.get("whitelist"), blacklist=preset_dict.get("blacklist"), modules=preset_dict.get("modules"), @@ -824,7 +829,7 @@ def to_dict(self, include_target=False, full_config=False, redact_secrets=False) if self.scan_name: preset_dict["scan_name"] = self.scan_name if self.scan_name and self.output_dir is not None: - preset_dict["output_dir"] = self.output_dir + preset_dict["output_dir"] = str(self.output_dir) # conditions if self.conditions: diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index b5269bf753..6447f09518 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -7,6 +7,7 @@ from pathlib import Path from sys import exc_info from datetime import datetime +from zoneinfo import ZoneInfo from collections import OrderedDict from bbot import __version__ @@ -18,6 +19,18 @@ from bbot.core.multiprocess import SHARED_INTERPRETER_STATE from bbot.core.helpers.async_helpers import async_to_sync_gen from bbot.errors import BBOTError, ScanError, ValidationError +from bbot.constants import ( + get_scan_status_code, + get_scan_status_name, + SCAN_STATUS_NOT_STARTED, + SCAN_STATUS_STARTING, + SCAN_STATUS_RUNNING, + SCAN_STATUS_FINISHING, + SCAN_STATUS_ABORTING, + SCAN_STATUS_ABORTED, + SCAN_STATUS_FAILED, + SCAN_STATUS_FINISHED, +) log = logging.getLogger("bbot.scanner") @@ -54,7 +67,6 @@ class Scanner: - "STARTING" (1): Status when the scan is initializing. - "RUNNING" (2): Status when the scan is in progress. - "FINISHING" (3): Status when the scan is in the process of finalizing. - - "CLEANING_UP" (4): Status when the scan is cleaning up resources. - "ABORTING" (5): Status when the scan is in the process of being aborted. - "ABORTED" (6): Status when the scan has been aborted. - "FAILED" (7): Status when the scan has encountered a failure. @@ -84,18 +96,6 @@ class Scanner: - Setting a status will trigger the `on_status` event in the dispatcher. """ - _status_codes = { - "NOT_STARTED": 0, - "STARTING": 1, - "RUNNING": 2, - "FINISHING": 3, - "CLEANING_UP": 4, - "ABORTING": 5, - "ABORTED": 6, - "FAILED": 7, - "FINISHED": 8, - } - def __init__( self, *targets, @@ -127,6 +127,7 @@ def __init__( self._success = False self._scan_finish_status_message = None + self._marked_finished = False if scan_id is not None: self.id = str(scan_id) @@ -179,13 +180,12 @@ def __init__( else: self.home = self.preset.bbot_home / "scans" / self.name + self._status_code = SCAN_STATUS_NOT_STARTED + # scan temp dir self.temp_dir = self.home / "temp" self.helpers.mkdir(self.temp_dir) - self._status = "NOT_STARTED" - self._status_code = 0 - self.modules = OrderedDict({}) self._modules_loaded = False self.dummy_modules = {} @@ -323,7 +323,7 @@ async def _prep(self): self._fail_setup(msg) total_modules = total_failed + len(self.modules) - success_msg = f"Setup succeeded for {len(self.modules):,}/{total_modules:,} modules." + success_msg = f"Setup succeeded for {len(self.modules) - 2:,}/{total_modules - 2:,} modules." self.success(success_msg) self._prepped = True @@ -341,9 +341,9 @@ async def async_start_without_generator(self): pass async def async_start(self): - """ """ - self.start_time = datetime.now() - self.root_event.data["started_at"] = self.start_time.isoformat() + self.start_time = datetime.now(ZoneInfo("UTC")) + self.root_event.data["started_at"] = self.start_time.timestamp() + await self._set_status(SCAN_STATUS_STARTING) try: await self._prep() @@ -360,18 +360,16 @@ async def async_start(self): self._status_ticker(self.status_frequency), name=f"{self.name}._status_ticker()" ) - self.status = "STARTING" - if not self.modules: self.error("No modules loaded") - self.status = "FAILED" + await self._set_status(SCAN_STATUS_FAILED) return else: self.hugesuccess(f"Starting scan {self.name}") await self.dispatcher.on_start(self) - self.status = "RUNNING" + await self._set_status(SCAN_STATUS_RUNNING) self._start_modules() self.verbose(f"{len(self.modules):,} modules started") @@ -401,8 +399,6 @@ async def async_start(self): new_activity = await self.finish() if not new_activity: self._success = True - scan_finish_event = await self._mark_finished() - yield scan_finish_event break await asyncio.sleep(0.1) @@ -411,7 +407,7 @@ async def async_start(self): except BaseException as e: if self.helpers.in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): - self.stop() + await self.async_stop() self._success = True else: try: @@ -426,6 +422,8 @@ async def async_start(self): self.critical(f"Unexpected error during scan:\n{traceback.format_exc()}") finally: + scan_finish_event = await self._mark_finished() + yield scan_finish_event tasks = self._cancel_tasks() self.debug(f"Awaiting {len(tasks):,} tasks") for task in tasks: @@ -435,7 +433,7 @@ async def async_start(self): self.debug(f"Awaited {len(tasks):,} tasks") await self._report() await self._cleanup() - + # report on final scan status await self.dispatcher.on_finish(self) self._stop_log_handlers() @@ -449,34 +447,44 @@ async def async_start(self): log_fn(self._scan_finish_status_message) async def _mark_finished(self): - if self.status == "ABORTING": - status = "ABORTED" + if self._marked_finished: + return + + self._marked_finished = True + + if self._status_code == SCAN_STATUS_ABORTING: + status_code = SCAN_STATUS_ABORTED elif not self._success: - status = "FAILED" + status_code = SCAN_STATUS_FAILED else: - status = "FINISHED" + status_code = SCAN_STATUS_FINISHED - self.end_time = datetime.now() + status = get_scan_status_name(status_code) + + self.end_time = datetime.now(ZoneInfo("UTC")) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) - self._scan_finish_status_message = f"Scan {self.name} completed in {self.duration_human} with status {status}" + self._scan_finish_status_message = ( + f"Scan {self.name} completed in {self.duration_human} with status {self.status}" + ) scan_finish_event = self.finish_event(self._scan_finish_status_message, status) - # queue final scan event with output modules - output_modules = [m for m in self.modules.values() if m._type == "output" and m.name != "python"] - for m in output_modules: - await m.queue_event(scan_finish_event) - # wait until output modules are flushed - while 1: - modules_finished = all(m.finished for m in output_modules) - if modules_finished: - break - await asyncio.sleep(0.05) - - self.status = status + if not self._stopping: + # queue final scan event with output modules + output_modules = [m for m in self.modules.values() if m._type == "output" and m.name != "python"] + for m in output_modules: + await m.queue_event(scan_finish_event) + # wait until output modules are flushed + while 1: + modules_finished = all([m.finished for m in output_modules]) + if modules_finished: + break + await asyncio.sleep(0.05) + + await self._set_status(status) return scan_finish_event def _start_modules(self): @@ -519,7 +527,8 @@ async def setup_modules(self, remove_failed=True): self.modules[module.name].set_error_state() hard_failed.append(module.name) else: - self.info(f"Setup soft-failed for {module.name}: {msg}") + log_fn = self.warning if module._type == "output" else self.info + log_fn(f"Setup soft-failed for {module.name}: {msg}") soft_failed.append(module.name) if (not status) and (module._intercept or remove_failed): # if a intercept module fails setup, we always remove it @@ -647,7 +656,7 @@ def num_queued_events(self): total += len(q._queue) return total - def modules_status(self, _log=False): + def modules_status(self, _log=False, detailed=False): finished = True status = {"modules": {}} @@ -717,7 +726,7 @@ def modules_status(self, _log=False): f"{self.name}: No events in queue ({self.stats.speedometer.speed:,} processed in the past {self.status_frequency} seconds)" ) - if self.log_level <= logging.DEBUG: + if detailed or self.log_level <= logging.DEBUG: # status debugging scan_active_status = [] scan_active_status.append(f"scan._finished_init: {self._finished_init}") @@ -750,7 +759,7 @@ def modules_status(self, _log=False): return status - def stop(self): + async def async_stop(self): """Stops the in-progress scan and performs necessary cleanup. This method sets the scan's status to "ABORTING," cancels any pending tasks, and drains event queues. It also kills child processes spawned during the scan. @@ -760,7 +769,7 @@ def stop(self): """ if not self._stopping: self._stopping = True - self.status = "ABORTING" + await self._set_status(SCAN_STATUS_ABORTING) self.hugewarning("Aborting scan") self.trace() self._cancel_tasks() @@ -769,6 +778,10 @@ def stop(self): self._drain_queues() self.helpers.kill_children() self.debug("Finished aborting scan") + await self._set_status(SCAN_STATUS_ABORTED) + + def stop(self): + asyncio.create_task(self.async_stop()) async def finish(self): """Finalizes the scan by invoking the `finished()` method on all active modules if new activity is detected. @@ -785,7 +798,7 @@ async def finish(self): # if new events were generated since last time we were here if self._new_activity: self._new_activity = False - self.status = "FINISHING" + await self._set_status(SCAN_STATUS_FINISHING) # Trigger .finished() on every module and start over log.info("Finishing scan") for module in self.modules.values(): @@ -839,8 +852,7 @@ def _cancel_tasks(self): # ticker if self.ticker_task: tasks.append(self.ticker_task) - # dispatcher - tasks += self.dispatcher_tasks + self.helpers.cancel_tasks_sync(tasks) # process pool self.helpers.process_pool.shutdown(cancel_futures=True) @@ -869,7 +881,8 @@ async def _cleanup(self): This method is called once at the end of the scan to perform resource cleanup tasks. It is executed regardless of whether the scan was aborted or completed - successfully. The scan status is set to "CLEANING_UP" during the execution. + successfully. + After calling the `cleanup()` method for each module, it performs additional cleanup tasks such as removing the scan's home directory if empty and cleaning old scans. @@ -880,16 +893,15 @@ async def _cleanup(self): # clean up self if not self._cleanedup: self._cleanedup = True - self.status = "CLEANING_UP" + # clean up modules + for mod in self.modules.values(): + await mod._cleanup() # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - # clean up modules - for mod in self.modules.values(): - await mod._cleanup() with contextlib.suppress(Exception): self.home.rmdir() self.helpers.rm_rf(self.temp_dir, ignore_errors=True) @@ -946,19 +958,19 @@ def stopping(self): @property def stopped(self): - return self._status_code > 5 + return self._status_code >= SCAN_STATUS_ABORTED @property def running(self): - return 0 < self._status_code < 4 + return SCAN_STATUS_STARTING <= self._status_code <= SCAN_STATUS_FINISHING @property def aborting(self): - return 5 <= self._status_code <= 6 + return SCAN_STATUS_ABORTING <= self._status_code <= SCAN_STATUS_ABORTED @property def status(self): - return self._status + return get_scan_status_name(self._status_code) @property def omitted_event_types(self): @@ -966,29 +978,22 @@ def omitted_event_types(self): self._omitted_event_types = self.config.get("omit_event_types", []) return self._omitted_event_types - @status.setter - def status(self, status): - """ - Block setting after status has been aborted - """ - status = str(status).strip().upper() - if status in self._status_codes: - if self.status == "ABORTING" and not status == "ABORTED": - self.debug(f'Attempt to set invalid status "{status}" on aborted scan') - else: - if status != self._status: - self._status = status - self._status_code = self._status_codes[status] - self.dispatcher_tasks.append( - asyncio.create_task( - self.dispatcher.catch(self.dispatcher.on_status, self._status, self.id), - name=f"{self.name}.dispatcher.on_status({status})", - ) - ) - else: - self.debug(f'Scan status is already "{status}"') - else: - self.debug(f'Attempt to set invalid status "{status}" on scan') + async def _set_status(self, status): + try: + status_code = get_scan_status_code(status) + status = get_scan_status_name(status_code) + except ValueError: + self.warning(f'Attempt to set invalid status "{status}" on scan') + + self.debug(f"Setting scan status from {self.status} to {status}") + # if the status isn't progressing forward, skip setting it + if status_code <= self._status_code: + self.debug(f'Attempt to set invalid status "{status}" on scan with status "{self.status}"') + return + + self._status_code = status_code + with self.dispatcher.catch(): + await self.dispatcher.on_status(self.status, self.id) def make_event(self, *args, **kwargs): kwargs["scan"] = self @@ -1019,14 +1024,18 @@ def root_event(self): if self._root_event is None: self._root_event = self.make_root_event(f"Scan {self.name} started at {self.start_time}") self._root_event.data["status"] = self.status + self._root_event.data["status_code"] = self._status_code return self._root_event - def finish_event(self, context=None, status=None): + def finish_event(self, context=None, status_code=None): if self._finish_event is None: - if context is None or status is None: - raise ValueError("Must specify context and status") + if context is None or status_code is None: + raise ValueError("Must specify context and status_code") self._finish_event = self.make_root_event(context) + status_code = get_scan_status_code(status_code) + status = get_scan_status_name(status_code) self._finish_event.data["status"] = status + self._finish_event.data["status_code"] = status_code return self._finish_event def make_root_event(self, context): @@ -1159,9 +1168,9 @@ def json(self): j["target"] = self.preset.target.json j["preset"] = self.preset.to_dict(redact_secrets=True) if self.start_time is not None: - j["started_at"] = self.start_time.isoformat() + j["started_at"] = self.start_time.timestamp() if self.end_time is not None: - j["finished_at"] = self.end_time.isoformat() + j["finished_at"] = self.end_time.timestamp() if self.duration is not None: j["duration_seconds"] = self.duration_seconds if self.duration_human is not None: diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index d894973c03..f0aa1315c3 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -216,12 +216,13 @@ class BBOTTarget: Provides high-level functions like in_scope(), which includes both whitelist and blacklist checks. """ - def __init__(self, *seeds, whitelist=None, blacklist=None, strict_scope=False): - self.strict_scope = strict_scope - self.seeds = ScanSeeds(*seeds, strict_dns_scope=strict_scope) + def __init__(self, *seeds, whitelist=None, blacklist=None, strict_dns_scope=False): + self.strict_dns_scope = strict_dns_scope + self.seeds = ScanSeeds(*seeds, strict_dns_scope=strict_dns_scope) + self._orig_whitelist = whitelist if whitelist is None: whitelist = self.seeds.hosts - self.whitelist = ScanWhitelist(*whitelist, strict_dns_scope=strict_scope) + self.whitelist = ScanWhitelist(*whitelist, strict_dns_scope=strict_dns_scope) if blacklist is None: blacklist = [] self.blacklist = ScanBlacklist(*blacklist) @@ -230,9 +231,9 @@ def __init__(self, *seeds, whitelist=None, blacklist=None, strict_scope=False): def json(self): return { "seeds": sorted(self.seeds.inputs), - "whitelist": sorted(self.whitelist.inputs), + "whitelist": (None if not self._orig_whitelist else sorted(self.whitelist.inputs)), "blacklist": sorted(self.blacklist.inputs), - "strict_scope": self.strict_scope, + "strict_dns_scope": self.strict_dns_scope, "hash": self.hash.hex(), "seed_hash": self.seeds.hash.hex(), "whitelist_hash": self.whitelist.hash.hex(), diff --git a/bbot/scripts/docs.py b/bbot/scripts/docs.py index 354885cf52..9b244d0ae3 100755 --- a/bbot/scripts/docs.py +++ b/bbot/scripts/docs.py @@ -6,7 +6,7 @@ import yaml from pathlib import Path -from bbot import Preset +from bbot.scanner import Preset from bbot.core.modules import MODULE_LOADER diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 9ad2d932fa..48c5a91bbd 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -146,48 +146,80 @@ def helpers(scan): @pytest.fixture def events(scan): + dummy_module = scan._make_dummy_module("dummy_module") + class bbot_events: - localhost = scan.make_event("127.0.0.1", parent=scan.root_event) - ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event) - netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event) - ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event) - netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event) - domain = scan.make_event("publicAPIs.org", parent=scan.root_event) - subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event) - email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event) - open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event) + localhost = scan.make_event("127.0.0.1", parent=scan.root_event, module=dummy_module) + ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event, module=dummy_module) + netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event, module=dummy_module) + ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event, module=dummy_module) + netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event, module=dummy_module) + domain = scan.make_event("publicAPIs.org", parent=scan.root_event, module=dummy_module) + subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event, module=dummy_module) + email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event, module=dummy_module) + open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event, module=dummy_module) protocol = scan.make_event( - {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, "PROTOCOL", parent=scan.root_event + {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, + "PROTOCOL", + parent=scan.root_event, + module=dummy_module, + ) + ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event, module=dummy_module) + ipv6_open_port = scan.make_event( + "[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event, module=dummy_module + ) + url_unverified = scan.make_event( + "https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv4_url_unverified = scan.make_event( + "https://8.8.8.8:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv6_url_unverified = scan.make_event( + "https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event, module=dummy_module ) - ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event) - ipv6_open_port = scan.make_event("[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event) - url_unverified = scan.make_event("https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event) - ipv4_url_unverified = scan.make_event("https://8.8.8.8:443/hellofriend", parent=scan.root_event) - ipv6_url_unverified = scan.make_event("https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event) url = scan.make_event( - "https://api.publicAPIs.org:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://api.publicAPIs.org:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, ) ipv4_url = scan.make_event( - "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event, module=dummy_module ) ipv6_url = scan.make_event( - "https://[2001:4860:4860::8888]:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://[2001:4860:4860::8888]:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, + ) + url_hint = scan.make_event( + "https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url, module=dummy_module ) - url_hint = scan.make_event("https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url) vulnerability = scan.make_event( - {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, + {"host": "evilcorp.com", "severity": "INFO", "description": "asdf", "name": "Vulnerability"}, "VULNERABILITY", parent=scan.root_event, + module=dummy_module, + ) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "asdf", "name": "Finding"}, + "FINDING", + parent=scan.root_event, + module=dummy_module, + ) + vhost = scan.make_event( + {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module ) - finding = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event) - vhost = scan.make_event({"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event) - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) + http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, "STORAGE_BUCKET", parent=scan.root_event, + module=dummy_module, ) - emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event) + emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event, module=dummy_module) bbot_events.all = [ # noqa: F841 bbot_events.localhost, diff --git a/bbot/test/fastapi_test.py b/bbot/test/fastapi_test.py index f0c7b2d789..a4a1d57107 100644 --- a/bbot/test/fastapi_test.py +++ b/bbot/test/fastapi_test.py @@ -1,5 +1,5 @@ from typing import List -from bbot import Scanner +from bbot.scanner import Scanner from fastapi import FastAPI, Query app = FastAPI() diff --git a/bbot/test/test_step_1/test__module__tests.py b/bbot/test/test_step_1/test__module__tests.py index 6221b61490..b68ad50a5d 100644 --- a/bbot/test/test_step_1/test__module__tests.py +++ b/bbot/test/test_step_1/test__module__tests.py @@ -2,7 +2,7 @@ import importlib from pathlib import Path -from bbot import Preset +from bbot.scanner import Preset from ..test_step_2.module_tests.base import ModuleTestBase log = logging.getLogger("bbot.test.modules") diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index 669ca827d9..3dca8aeded 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -9,7 +9,7 @@ def run_bbot_multiprocess(queue): - from bbot import Scanner + from bbot.scanner import Scanner scan = Scanner("http://127.0.0.1:8888", "blacklanternsecurity.com", modules=["httpx"]) events = [e.json() for e in scan.start()] @@ -27,7 +27,7 @@ def test_bbot_multiprocess(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any(e["data"] == "test@blacklanternsecurity.com" for e in events) + assert any(e.get("data", "") == "test@blacklanternsecurity.com" for e in events) def test_bbot_fastapi(bbot_httpserver): @@ -58,7 +58,7 @@ def test_bbot_fastapi(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any(e["data"] == "test@blacklanternsecurity.com" for e in events) + assert any(e.get("data", "") == "test@blacklanternsecurity.com" for e in events) finally: with suppress(Exception): diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py new file mode 100644 index 0000000000..d453fa81e1 --- /dev/null +++ b/bbot/test/test_step_1/test_db_models.py @@ -0,0 +1,93 @@ +from datetime import datetime +from zoneinfo import ZoneInfo + +from bbot.models.pydantic import Event +from bbot.core.event.base import BaseEvent +from bbot.models.helpers import utc_datetime_validator +from ..bbot_fixtures import * # noqa + + +def test_pydantic_models(events, bbot_scanner): + # test datetime helpers + now = datetime.now(ZoneInfo("America/New_York")) + utc_now = utc_datetime_validator(now) + assert now.timestamp() == utc_now.timestamp() + now2 = datetime.fromtimestamp(utc_now.timestamp(), ZoneInfo("UTC")) + assert now2.timestamp() == utc_now.timestamp() + utc_now2 = utc_datetime_validator(now2) + assert utc_now2.timestamp() == utc_now.timestamp() + + test_event = Event(**events.ipv4.json()) + assert sorted(test_event.indexed_fields()) == [ + "data", + "host", + "id", + "inserted_at", + "module", + "parent", + "parent_uuid", + "reverse_host", + "scan", + "timestamp", + "type", + "uuid", + ] + + # convert events to pydantic and back, making sure they're exactly the same + for event in ("ipv4", "http_response", "finding", "vulnerability", "storage_bucket"): + e = getattr(events, event) + event_json = e.json() + event_pydantic = Event(**event_json) + event_pydantic_dict = event_pydantic.model_dump() + event_reconstituted = BaseEvent.from_json(event_pydantic.model_dump(exclude_none=True)) + assert isinstance(event_json["timestamp"], float) + assert isinstance(e.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, float) + assert not "inserted_at" in event_json + assert isinstance(event_pydantic_dict["timestamp"], float) + assert isinstance(event_pydantic_dict["inserted_at"], float) + + event_pydantic_dict = event_pydantic.model_dump( + exclude_none=True, exclude=["reverse_host", "inserted_at", "archived"] + ) + assert event_pydantic_dict == event_json + event_pydantic_dict.pop("scan") + event_pydantic_dict.pop("module") + event_pydantic_dict.pop("module_sequence") + assert event_reconstituted.json() == event_pydantic_dict + + # make sure we can dedupe events by their id + scan = bbot_scanner() + event1 = scan.make_event("1.2.3.4", parent=scan.root_event) + event2 = scan.make_event("1.2.3.4", parent=scan.root_event) + event3 = scan.make_event("evilcorp.com", parent=scan.root_event) + event4 = scan.make_event("evilcorp.com", parent=scan.root_event) + # first two events are IPS + assert event1.uuid != event2.uuid + assert event1.id == event2.id + # second two are DNS + assert event2.uuid != event3.uuid + assert event2.id != event3.id + assert event3.uuid != event4.uuid + assert event3.id == event4.id + + event_set_bbot = { + event1, + event2, + event3, + event4, + } + assert len(event_set_bbot) == 2 + assert set([e.type for e in event_set_bbot]) == {"IP_ADDRESS", "DNS_NAME"} + + event_set_pydantic = { + Event(**event1.json()), + Event(**event2.json()), + Event(**event3.json()), + Event(**event4.json()), + } + assert len(event_set_pydantic) == 2 + assert set([e.type for e in event_set_pydantic]) == {"IP_ADDRESS", "DNS_NAME"} + + +# TODO: SQL diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 64bd060bf8..051b561b6c 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -334,26 +334,71 @@ async def test_events(events, helpers): assert "affiliate" in corrected_event4.tags test_vuln = scan.make_event( - {"host": "EVILcorp.com", "severity": "iNfo ", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "EVILcorp.com", "severity": "iNfo ", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, ) assert test_vuln.data["host"] == "evilcorp.com" assert test_vuln.data["severity"] == "INFO" test_vuln2 = scan.make_event( - {"host": "192.168.1.1", "severity": "iNfo ", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "192.168.1.1", "severity": "iNfo ", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, ) assert json.loads(test_vuln2.data_human)["severity"] == "INFO" assert test_vuln2.host.is_private + # must have severity with pytest.raises(ValidationError, match=".*validation error.*\nseverity\n.*Field required.*"): - test_vuln = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "VULNERABILITY", dummy=True) + test_vuln = scan.make_event( + {"host": "evilcorp.com", "description": "asdf", "name": "Vulnerability"}, "VULNERABILITY", dummy=True + ) + # invalid host with pytest.raises(ValidationError, match=".*host.*\n.*Invalid host.*"): test_vuln = scan.make_event( - {"host": "!@#$", "severity": "INFO", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "!@#$", "severity": "INFO", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, ) + # invalid severity with pytest.raises(ValidationError, match=".*severity.*\n.*Invalid severity.*"): test_vuln = scan.make_event( - {"host": "evilcorp.com", "severity": "WACK", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "evilcorp.com", "severity": "WACK", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, + ) + # must have name + with pytest.raises(ValidationError, match=".*name.*\n.*Field required.*"): + test_vuln = scan.make_event( + {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, + "VULNERABILITY", + dummy=True, ) + # port and netloc should be derived from URL + test_vuln = scan.make_event( + { + "host": "evilcorp.com", + "name": "test", + "severity": "INFO", + "description": "asdf", + "url": "http://evilcorp.com/test", + }, + "VULNERABILITY", + dummy=True, + ) + assert test_vuln.host == "evilcorp.com" + assert test_vuln.port == 80 + assert test_vuln.netloc == "evilcorp.com:80" + + # technology should be lowercased + tech_event = scan.make_event( + {"host": "evilcorp.com", "technology": "HTTP", "url": "http://evilcorp.com/test"}, + "TECHNOLOGY", + dummy=True, + ) + assert tech_event.data["technology"] == "http" + assert tech_event.port == 80 + # test tagging ip_event_1 = scan.make_event("8.8.8.8", dummy=True) assert "private-ip" not in ip_event_1.tags @@ -499,7 +544,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.isoformat() + timestamp = db_event.timestamp.timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) @@ -520,7 +565,7 @@ async def test_events(events, helpers): assert reconstituted_event.uuid == db_event.uuid assert reconstituted_event.parent_uuid == scan.root_event.uuid assert reconstituted_event.scope_distance == 1 - assert reconstituted_event.timestamp.isoformat() == timestamp + assert reconstituted_event.timestamp.timestamp() == timestamp assert reconstituted_event.data == "evilcorp.com:80" assert reconstituted_event.type == "OPEN_TCP_PORT" assert reconstituted_event.host == "evilcorp.com" @@ -534,21 +579,6 @@ async def test_events(events, helpers): assert hostless_event_json["data"] == "asdf" assert "host" not in hostless_event_json - # SIEM-friendly serialize/deserialize - json_event_siemfriendly = db_event.json(siem_friendly=True) - assert json_event_siemfriendly["scope_distance"] == 1 - assert json_event_siemfriendly["data"] == {"OPEN_TCP_PORT": "evilcorp.com:80"} - assert json_event_siemfriendly["type"] == "OPEN_TCP_PORT" - assert json_event_siemfriendly["host"] == "evilcorp.com" - assert json_event_siemfriendly["timestamp"] == timestamp - reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) - assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.isoformat() == timestamp - assert reconstituted_event2.data == "evilcorp.com:80" - assert reconstituted_event2.type == "OPEN_TCP_PORT" - assert reconstituted_event2.host == "evilcorp.com" - assert "127.0.0.1" in reconstituted_event2.resolved_hosts - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" @@ -557,9 +587,13 @@ async def test_events(events, helpers): == 'HTTP/1.1 200 OK\r\nConnection: close\r\nAge: 526111\r\nCache-Control: max-age=604800\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Mon, 14 Nov 2022 17:14:27 GMT\r\nEtag: "3147526947+ident+gzip"\r\nExpires: Mon, 21 Nov 2022 17:14:27 GMT\r\nLast-Modified: Thu, 17 Oct 2019 07:18:26 GMT\r\nServer: ECS (agb/A445)\r\nVary: Accept-Encoding\r\nX-Cache: HIT\r\n\r\n\n\n\n Example Domain\n\n \n \n \n \n\n\n\n
\n

Example Domain

\n

This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.

\n

More information...

\n
\n\n\n' ) json_event = http_response.json(mode="graph") + assert "data" in json_event + assert "data_json" not in json_event assert isinstance(json_event["data"], str) json_event = http_response.json() - assert isinstance(json_event["data"], dict) + assert "data" not in json_event + assert "data_json" in json_event + assert isinstance(json_event["data_json"], dict) assert json_event["type"] == "HTTP_RESPONSE" assert json_event["host"] == "example.com" assert json_event["parent"] == scan.root_event.id @@ -951,13 +985,15 @@ def test_event_closest_host(): event3 = scan.make_event({"path": "/tmp/asdf.txt"}, "FILESYSTEM", parent=event2) assert not event3.host # finding automatically uses the host from the second event - finding = scan.make_event({"description": "test"}, "FINDING", parent=event3) + finding = scan.make_event({"description": "test", "name": "Finding"}, "FINDING", parent=event3) assert finding.data["host"] == "www.evilcorp.com" assert finding.data["url"] == "http://www.evilcorp.com/asdf" assert finding.data["path"] == "/tmp/asdf.txt" assert finding.host == "www.evilcorp.com" # same with vuln - vuln = scan.make_event({"description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3) + vuln = scan.make_event( + {"description": "test", "severity": "HIGH", "name": "Vulnerability"}, "VULNERABILITY", parent=event3 + ) assert vuln.data["host"] == "www.evilcorp.com" assert vuln.data["url"] == "http://www.evilcorp.com/asdf" assert vuln.data["path"] == "/tmp/asdf.txt" @@ -967,19 +1003,29 @@ def test_event_closest_host(): event3 = scan.make_event("wat", "ASDF", parent=scan.root_event) assert not event3.host with pytest.raises(ValueError): - finding = scan.make_event({"description": "test"}, "FINDING", parent=event3) - finding = scan.make_event({"path": "/tmp/asdf.txt", "description": "test"}, "FINDING", parent=event3) + finding = scan.make_event({"description": "test", "name": "Finding"}, "FINDING", parent=event3) + finding = scan.make_event( + {"path": "/tmp/asdf.txt", "description": "test", "name": "Finding"}, "FINDING", parent=event3 + ) assert finding is not None - finding = scan.make_event({"host": "evilcorp.com", "description": "test"}, "FINDING", parent=event3) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "test", "name": "Finding"}, "FINDING", parent=event3 + ) assert finding is not None with pytest.raises(ValueError): - vuln = scan.make_event({"description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3) + vuln = scan.make_event( + {"description": "test", "severity": "HIGH", "name": "Vulnerability"}, "VULNERABILITY", parent=event3 + ) vuln = scan.make_event( - {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3 + {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH", "name": "Vulnerability"}, + "VULNERABILITY", + parent=event3, ) assert vuln is not None vuln = scan.make_event( - {"host": "evilcorp.com", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3 + {"host": "evilcorp.com", "description": "test", "severity": "HIGH", "name": "Vulnerability"}, + "VULNERABILITY", + parent=event3, ) assert vuln is not None @@ -1069,21 +1115,24 @@ def test_event_hashing(): url_event = scan.make_event("https://api.example.com/", "URL_UNVERIFIED", parent=scan.root_event) host_event_1 = scan.make_event("www.example.com", "DNS_NAME", parent=url_event) host_event_2 = scan.make_event("test.example.com", "DNS_NAME", parent=url_event) - finding_data = {"description": "Custom Yara Rule [find_string] Matched via identifier [str1]"} + finding_data = {"description": "Custom Yara Rule [find_string] Matched via identifier [str1]", "name": "Finding"} finding1 = scan.make_event(finding_data, "FINDING", parent=host_event_1) finding2 = scan.make_event(finding_data, "FINDING", parent=host_event_2) finding3 = scan.make_event(finding_data, "FINDING", parent=host_event_2) assert finding1.data == { "description": "Custom Yara Rule [find_string] Matched via identifier [str1]", + "name": "Finding", "host": "www.example.com", } assert finding2.data == { "description": "Custom Yara Rule [find_string] Matched via identifier [str1]", + "name": "Finding", "host": "test.example.com", } assert finding3.data == { "description": "Custom Yara Rule [find_string] Matched via identifier [str1]", + "name": "Finding", "host": "test.example.com", } assert finding1.id != finding2.id diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index f012b0e3e0..a976498f15 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -115,6 +115,8 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) if scan_callback is not None: scan_callback(scan) output_events = [e async for e in scan.async_start()] + # let modules initialize + await asyncio.sleep(0.5) return ( output_events, dummy_module.events, @@ -268,7 +270,9 @@ async def filter_event(self, event): async def handle_event(self, event): await self.emit_event( - {"host": str(event.host), "description": "yep", "severity": "CRITICAL"}, "VULNERABILITY", parent=event + {"host": str(event.host), "description": "yep", "severity": "CRITICAL", "name": "Vulnerability"}, + "VULNERABILITY", + parent=event, ) def custom_setup(scan): diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 07b4f6692d..9e2e21e12e 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -242,7 +242,7 @@ class mod_domain_only(BaseModule): scan.modules["mod_host_only"] = mod_host_only(scan) scan.modules["mod_hostport_only"] = mod_hostport_only(scan) scan.modules["mod_domain_only"] = mod_domain_only(scan) - scan.status = "RUNNING" + await scan._set_status("RUNNING") url_1 = scan.make_event("http://evilcorp.com/1", event_type="URL", parent=scan.root_event, tags=["status-200"]) url_2 = scan.make_event("http://evilcorp.com/2", event_type="URL", parent=scan.root_event, tags=["status-200"]) @@ -310,7 +310,7 @@ async def test_modules_basic_perdomainonly(bbot_scanner, monkeypatch): await per_domain_scan.load_modules() await per_domain_scan.setup_modules() - per_domain_scan.status = "RUNNING" + await per_domain_scan._set_status("RUNNING") # ensure that multiple events to the same "host" (schema + host) are blocked and check the per host tracker @@ -354,7 +354,14 @@ async def handle_event(self, event): # quick emit events like FINDINGS behave differently than normal ones # hosts are not speculated from them await self.emit_event( - {"host": "www.evilcorp.com", "url": "http://www.evilcorp.com", "description": "asdf"}, "FINDING", event + { + "host": "www.evilcorp.com", + "url": "http://www.evilcorp.com", + "description": "asdf", + "name": "Finding", + }, + "FINDING", + event, ) await self.emit_event("https://asdf.evilcorp.com", "URL", event, tags=["status-200"]) @@ -449,7 +456,7 @@ async def test_module_loading(bbot_scanner): force_start=True, ) await scan2.load_modules() - scan2.status = "RUNNING" + await scan2._set_status("RUNNING") # attributes, descriptions, etc. for module_name, module in sorted(scan2.modules.items()): diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 744b108dd3..8fb5b75858 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -568,6 +568,75 @@ def test_preset_module_resolution(clean_default_config): } +@pytest.mark.asyncio +async def test_custom_module_dir(): + custom_module_dir = bbot_test_dir / "custom_modules" + custom_module_dir.mkdir(parents=True, exist_ok=True) + + custom_module = custom_module_dir / "testmodule.py" + with open(custom_module, "w") as f: + f.write( + """ +from bbot.modules.base import BaseModule + +class TestModule(BaseModule): + watched_events = ["SCAN"] + + async def handle_event(self, event): + await self.emit_event("127.0.0.2", parent=event) +""" + ) + + preset = { + "module_dirs": [str(custom_module_dir)], + "modules": ["testmodule"], + } + preset = Preset.from_dict(preset) + + scan = Scanner("127.0.0.0/24", preset=preset) + events = [e async for e in scan.async_start()] + event_data = [(str(e.data), str(e.module)) for e in events] + assert ("127.0.0.2", "testmodule") in event_data + + shutil.rmtree(custom_module_dir) + + +def test_preset_scope_round_trip(): + preset_dict = { + "target": ["127.0.0.1"], + "whitelist": ["127.0.0.2"], + "blacklist": ["127.0.0.3"], + "config": {"scope": {"strict": True}}, + } + preset = Preset.from_dict(preset_dict) + baked = preset.bake() + assert list(baked.seeds) == ["127.0.0.1"] + assert list(baked.whitelist) == ["127.0.0.2"] + assert list(baked.blacklist) == ["127.0.0.3"] + assert baked.config.scope.strict is True + assert baked.to_dict(include_target=True) == preset_dict + + +def test_preset_target_tolerance(): + # tolerate both "target" and "targets", since this is a common oopsie + preset_dict = { + "target": ["127.0.0.1"], + "targets": ["127.0.0.2"], + } + preset = Preset.from_dict(preset_dict) + baked = preset.bake() + assert set(baked.seeds) == {"127.0.0.1", "127.0.0.2"} + + preset = Preset.from_yaml_string(""" +target: + - 127.0.0.1 +targets: + - 127.0.0.2 +""") + baked = preset.bake() + assert set(baked.seeds) == {"127.0.0.1", "127.0.0.2"} + + @pytest.mark.asyncio async def test_preset_module_loader(): custom_module_dir = bbot_test_dir / "custom_module_dir" diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 1282110400..9cd7e1c974 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -3,7 +3,7 @@ @pytest.mark.asyncio async def test_python_api(): - from bbot import Scanner + from bbot.scanner import Scanner # make sure events are properly yielded scan1 = Scanner("127.0.0.1") @@ -95,7 +95,7 @@ def test_python_api_validation(): # invalid output module with pytest.raises(ValidationError) as error: Scanner(output_modules=["asdf"]) - assert str(error.value) == 'Could not find output module "asdf". Did you mean "teams"?' + assert str(error.value) == 'Could not find output module "asdf". Did you mean "nats"?' # invalid excluded module with pytest.raises(ValidationError) as error: Scanner(exclude_modules=["asdf"]) @@ -119,7 +119,7 @@ def test_python_api_validation(): # normal module as output module with pytest.raises(ValidationError) as error: Scanner(output_modules=["robots"]) - assert str(error.value) == 'Could not find output module "robots". Did you mean "web_report"?' + assert str(error.value) == 'Could not find output module "robots". Did you mean "rabbitmq"?' # invalid preset type with pytest.raises(ValidationError) as error: Scanner(preset="asdf") diff --git a/bbot/test/test_step_1/test_regexes.py b/bbot/test/test_step_1/test_regexes.py index 94860fd4c0..ffefbf1a12 100644 --- a/bbot/test/test_step_1/test_regexes.py +++ b/bbot/test/test_step_1/test_regexes.py @@ -351,7 +351,7 @@ def test_url_regexes(): @pytest.mark.asyncio async def test_regex_helper(): - from bbot import Scanner + from bbot.scanner import Scanner scan = Scanner("evilcorp.com", "evilcorp.org", "evilcorp.net", "evilcorp.co.uk") diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index c5222d9591..66738796ed 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -35,9 +35,9 @@ async def test_scan( assert not scan0.in_scope("www.evilcorp.co.uk") j = scan0.json assert set(j["target"]["seeds"]) == {"1.1.1.0", "1.1.1.0/31", "evilcorp.com", "test.evilcorp.com"} - # we preserve the original whitelist inputs - assert set(j["target"]["whitelist"]) == {"1.1.1.0/32", "1.1.1.0/31", "evilcorp.com", "test.evilcorp.com"} - # but in the background they are collapsed + # no whitelist was set + assert j["target"]["whitelist"] is None + # but functionally it was copied from the seeds, and collapsed assert scan0.target.whitelist.hosts == {ip_network("1.1.1.0/31"), "evilcorp.com"} assert set(j["target"]["blacklist"]) == {"1.1.1.0/28", "www.evilcorp.com"} assert "ipneighbor" in j["preset"]["modules"] @@ -194,7 +194,7 @@ async def test_python_output_matches_json(bbot_scanner): assert len(events) == 5 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert all(isinstance(e["data"]["status"], str) for e in scan_events) + assert all(isinstance(e["data_json"]["status"], str) for e in scan_events) assert len([e for e in events if e["type"] == "DNS_NAME"]) == 1 assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1 assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1 @@ -223,7 +223,7 @@ async def test_huge_target_list(bbot_scanner, monkeypatch): async def test_exclude_cdn(bbot_scanner, monkeypatch): # test that CDN exclusion works - from bbot import Preset + from bbot.scanner import Preset dns_mock = { "evilcorp.com": {"A": ["127.0.0.1"]}, diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index a368718048..e2de5fb6f3 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -172,7 +172,7 @@ async def test_target_basic(bbot_scanner): bbottarget3 = BBOTTarget("evilcorp.com", whitelist=["1.2.3.4/24"], blacklist=["1.2.3.4"]) bbottarget5 = BBOTTarget("evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"]) bbottarget6 = BBOTTarget( - "evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"], strict_scope=True + "evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"], strict_dns_scope=True ) bbottarget8 = BBOTTarget("1.2.3.0/24", whitelist=["evilcorp.com", "evilcorp.net"], blacklist=["1.2.3.4"]) bbottarget9 = BBOTTarget("evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"]) @@ -203,7 +203,7 @@ async def test_target_basic(bbot_scanner): assert bbottarget1 != bbottarget2 assert bbottarget2 != bbottarget1 - # make sure strict_scope is considered in hash + # make sure strict_dns_scope is considered in hash assert bbottarget5 != bbottarget6 assert bbottarget6 != bbottarget5 @@ -304,7 +304,7 @@ async def test_target_basic(bbot_scanner): assert target_dict["seeds"] == ["1.2.3.0/24", "bob@fdsa.evilcorp.net", "http://www.evilcorp.net/"] assert target_dict["whitelist"] == ["bob@www.evilcorp.com", "evilcorp.com", "evilcorp.net"] assert target_dict["blacklist"] == ["1.2.3.4", "4.3.2.0/24", "bob@asdf.evilcorp.net", "http://1.2.3.4/"] - assert target_dict["strict_scope"] is False + assert target_dict["strict_dns_scope"] is False assert target_dict["hash"] == "b36955a8238a71842fc5f23b11110c26ea07d451" assert target_dict["seed_hash"] == "560af51d1f3d69bc5c156fc270b28497fe52dec1" assert target_dict["whitelist_hash"] == "8ed0a7368e6d34630e1cfd419d2a73767debc4c4" @@ -327,7 +327,7 @@ async def test_target_basic(bbot_scanner): target = RadixTarget("www.evilcorp.com", "evilcorp.com", acl_mode=True) assert set(target) == {"evilcorp.com"} - # make sure strict_scope doesn't mess us up + # make sure strict_dns_scope doesn't mess us up target = RadixTarget("evilcorp.co.uk", "www.evilcorp.co.uk", acl_mode=True, strict_dns_scope=True) assert set(target.hosts) == {"evilcorp.co.uk", "www.evilcorp.co.uk"} assert "evilcorp.co.uk" in target diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index 96079b5f04..235d498e2f 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -289,7 +289,7 @@ async def test_web_interactsh(bbot_scanner, bbot_httpserver): async_correct_url = False scan1 = bbot_scanner("8.8.8.8") - scan1.status = "RUNNING" + await scan1._set_status("RUNNING") interactsh_client = scan1.helpers.interactsh(poll_interval=3) interactsh_client2 = scan1.helpers.interactsh(poll_interval=3) diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index 26bd0b7995..7d70c80271 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -158,3 +158,19 @@ async def setup_before_prep(self, module_test): async def setup_after_prep(self, module_test): pass + + async def wait_for_port_open(self, port): + while not await self.is_port_open("localhost", port): + self.log.verbose(f"Waiting for port {port} to be open...") + await asyncio.sleep(0.5) + # allow an extra second for things to settle + await asyncio.sleep(1) + + async def is_port_open(self, host, port): + try: + reader, writer = await asyncio.open_connection(host, port) + writer.close() + await writer.wait_closed() + return True + except (ConnectionRefusedError, OSError): + return False diff --git a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py index 8accc7c300..6d6fbeb1c6 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py +++ b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py @@ -92,7 +92,7 @@ def check(self, module_test, events): dnn_installwizard_privesc_detection = False for e in events: - if e.type == "TECHNOLOGY" and "DotNetNuke" in e.data["technology"]: + if e.type == "TECHNOLOGY" and "dotnetnuke" in e.data["technology"]: dnn_technology_detection = True if ( @@ -165,7 +165,7 @@ def check(self, module_test, events): dnn_dnnimagehandler_blindssrf = False for e in events: - if e.type == "TECHNOLOGY" and "DotNetNuke" in e.data["technology"]: + if e.type == "TECHNOLOGY" and "dotnetnuke" in e.data["technology"]: dnn_technology_detection = True if e.type == "VULNERABILITY" and "DotNetNuke Blind-SSRF (CVE 2017-0929)" in e.data["description"]: diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py new file mode 100644 index 0000000000..9846f02246 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -0,0 +1,123 @@ +import time +import httpx +import asyncio + +from .base import ModuleTestBase + + +class TestElastic(ModuleTestBase): + config_overrides = { + "modules": { + "elastic": { + "url": "https://localhost:9200/bbot_test_events/_doc", + "username": "elastic", + "password": "bbotislife", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Elasticsearch container + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-elastic", + "--rm", + "-e", + "ELASTIC_PASSWORD=bbotislife", + "-e", + "cluster.routing.allocation.disk.watermark.low=96%", + "-e", + "cluster.routing.allocation.disk.watermark.high=97%", + "-e", + "cluster.routing.allocation.disk.watermark.flood_stage=98%", + "-p", + "9200:9200", + "-d", + "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", + ) + + # Connect to Elasticsearch with retry logic + async with httpx.AsyncClient(verify=False) as client: + while True: + try: + # Attempt a simple operation to confirm the connection + response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) + response.raise_for_status() + break + except Exception as e: + self.log.verbose(f"Connection failed: {e}. Retrying...") + time.sleep(0.5) + + # Ensure the index is empty + await client.delete("https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to Elasticsearch + async with httpx.AsyncClient(verify=False) as client: + # Fetch all events from the index + response = await client.get( + "https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") + ) + response_json = response.json() + db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, ( + "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + ) + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert main_event.get("reverse_host") == expected_reverse_host, ( + f"reverse_host attribute is not correct, expected {expected_reverse_host}" + ) + + # Events don't match exactly because the elastic ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host", None) + db_event.pop("inserted_at", None) + db_event.pop("archived", None) + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the index + async with httpx.AsyncClient(verify=False) as client: + response = await client.delete( + "https://localhost:9200/bbot_test_events", + auth=("elastic", "bbotislife"), + params={"ignore": "400,404"}, + ) + self.log.verbose("Deleted documents from index") + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index e2e6dc78eb..0289e83e8d 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -1444,9 +1444,16 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests({"uri": "/"}, {"response_data": self.bad_url_data}) def check(self, module_test, events): + import gzip + debug_log_content = open(module_test.scan.home / "debug.log").read() + for archived_debug_log in module_test.scan.home.glob("debug.log.*.gz"): + gzipped_content = open(archived_debug_log).read() + ungzipped_content = gzip.decompress(gzipped_content).decode("utf-8") + debug_log_content += ungzipped_content + # make sure our logging is working - assert "Setting scan status to STARTING" in debug_log_content + assert "Setting scan status to RUNNING" in debug_log_content # make sure we don't have any URL validation errors assert "Error Parsing reconstructed URL" not in debug_log_content assert "Error sanitizing event data" not in debug_log_content diff --git a/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py b/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py deleted file mode 100644 index c0911fd661..0000000000 --- a/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py +++ /dev/null @@ -1,88 +0,0 @@ -import re -import asyncio -from werkzeug.wrappers import Response - -from .base import ModuleTestBase - - -def extract_subdomain_tag(data): - pattern = r"http://([a-z0-9]{4})\.fakedomain\.fakeinteractsh\.com" - match = re.search(pattern, data) - if match: - return match.group(1) - - -class TestGeneric_SSRF(ModuleTestBase): - targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "generic_ssrf"] - - def request_handler(self, request): - subdomain_tag = None - - if request.method == "GET": - subdomain_tag = extract_subdomain_tag(request.full_path) - elif request.method == "POST": - subdomain_tag = extract_subdomain_tag(request.data.decode()) - if subdomain_tag: - asyncio.run( - self.interactsh_mock_instance.mock_interaction( - subdomain_tag, msg=f"{request.method}: {request.data.decode()}" - ) - ) - - return Response("alive", status=200) - - async def setup_before_prep(self, module_test): - self.interactsh_mock_instance = module_test.mock_interactsh("generic_ssrf") - module_test.monkeypatch.setattr( - module_test.scan.helpers, "interactsh", lambda *args, **kwargs: self.interactsh_mock_instance - ) - - async def setup_after_prep(self, module_test): - expect_args = re.compile("/") - module_test.set_expect_requests_handler(expect_args=expect_args, request_handler=self.request_handler) - - def check(self, module_test, events): - total_vulnerabilities = 0 - total_findings = 0 - - for e in events: - if e.type == "VULNERABILITY": - total_vulnerabilities += 1 - elif e.type == "FINDING": - total_findings += 1 - - assert total_vulnerabilities == 30, "Incorrect number of vulnerabilities detected" - assert total_findings == 30, "Incorrect number of findings detected" - - assert any( - e.type == "VULNERABILITY" - and "Out-of-band interaction: [Generic SSRF (GET)]" - and "[Triggering Parameter: Dest]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (GET)" - assert any( - e.type == "VULNERABILITY" and "Out-of-band interaction: [Generic SSRF (POST)]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (POST)" - assert any( - e.type == "VULNERABILITY" and "Out-of-band interaction: [Generic XXE] [HTTP]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (XXE)" - - -class TestGeneric_SSRF_httponly(TestGeneric_SSRF): - config_overrides = {"modules": {"generic_ssrf": {"skip_dns_interaction": True}}} - - def check(self, module_test, events): - total_vulnerabilities = 0 - total_findings = 0 - - for e in events: - if e.type == "VULNERABILITY": - total_vulnerabilities += 1 - elif e.type == "FINDING": - total_findings += 1 - - assert total_vulnerabilities == 30, "Incorrect number of vulnerabilities detected" - assert total_findings == 0, "Incorrect number of findings detected" diff --git a/bbot/test/test_step_2/module_tests/test_module_gitlab.py b/bbot/test/test_step_2/module_tests/test_module_gitlab.py index 6d593adf65..4396ca36ae 100644 --- a/bbot/test/test_step_2/module_tests/test_module_gitlab.py +++ b/bbot/test/test_step_2/module_tests/test_module_gitlab.py @@ -167,7 +167,7 @@ def check(self, module_test, events): e for e in events if e.type == "TECHNOLOGY" - and e.data["technology"] == "GitLab" + and e.data["technology"] == "gitlab" and e.data["url"] == "http://127.0.0.1:8888/" ] ) diff --git a/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py b/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py index f6a47671c7..dd0380f653 100644 --- a/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py +++ b/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py @@ -31,4 +31,4 @@ def check(self, module_test, events): finding = [e for e in events if e.type == "FINDING"] assert finding, "should have raised 1 FINDING event" assert finding[0].data["url"] == "http://127.0.0.1:8888/" - assert finding[0].data["description"] == "GraphQL schema" + assert finding[0].data["description"] == "GraphQL Schema at http://127.0.0.1:8888/" diff --git a/bbot/test/test_step_2/module_tests/test_module_http.py b/bbot/test/test_step_2/module_tests/test_module_http.py index 2bc99f5ddf..df90b78525 100644 --- a/bbot/test/test_step_2/module_tests/test_module_http.py +++ b/bbot/test/test_step_2/module_tests/test_module_http.py @@ -52,12 +52,3 @@ def check(self, module_test, events): assert self.headers_correct is True assert self.method_correct is True assert self.url_correct is True - - -class TestHTTPSIEMFriendly(TestHTTP): - modules_overrides = ["http"] - config_overrides = {"modules": {"http": dict(TestHTTP.config_overrides["modules"]["http"])}} - config_overrides["modules"]["http"]["siem_friendly"] = True - - def verify_data(self, j): - return j["data"] == {"DNS_NAME": "blacklanternsecurity.com"} and j["type"] == "DNS_NAME" diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index 27ed5a55e0..de37354d0f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -23,13 +23,13 @@ def check(self, module_test, events): assert len(dns_json) == 1 dns_json = dns_json[0] scan = scan_json[0] - assert scan["data"]["name"] == module_test.scan.name - assert scan["data"]["id"] == module_test.scan.id + assert scan["data_json"]["name"] == module_test.scan.name + assert scan["data_json"]["id"] == module_test.scan.id assert scan["id"] == module_test.scan.id assert scan["uuid"] == str(module_test.scan.root_event.uuid) assert scan["parent_uuid"] == str(module_test.scan.root_event.uuid) - assert scan["data"]["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan["data"]["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["seeds"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["whitelist"] is None assert dns_json["data"] == dns_data assert dns_json["id"] == str(dns_event.id) assert dns_json["uuid"] == str(dns_event.uuid) @@ -46,25 +46,10 @@ def check(self, module_test, events): assert scan_reconstructed.uuid == scan_event.uuid assert scan_reconstructed.parent_uuid == scan_event.uuid assert scan_reconstructed.data["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan_reconstructed.data["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan_reconstructed.data["target"]["whitelist"] is None assert dns_reconstructed.data == dns_data assert dns_reconstructed.uuid == dns_event.uuid assert dns_reconstructed.parent_uuid == module_test.scan.root_event.uuid assert dns_reconstructed.discovery_context == context_data assert dns_reconstructed.discovery_path == [context_data] assert dns_reconstructed.parent_chain == [dns_json["uuid"]] - - -class TestJSONSIEMFriendly(ModuleTestBase): - modules_overrides = ["json"] - config_overrides = {"modules": {"json": {"siem_friendly": True}}} - - def check(self, module_test, events): - txt_file = module_test.scan.home / "output.json" - lines = list(module_test.scan.helpers.read_file(txt_file)) - passed = False - for line in lines: - e = json.loads(line) - if e["data"] == {"DNS_NAME": "blacklanternsecurity.com"}: - passed = True - assert passed diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py new file mode 100644 index 0000000000..43d2eb4053 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -0,0 +1,92 @@ +import json +import asyncio + +from .base import ModuleTestBase + + +class TestKafka(ModuleTestBase): + config_overrides = { + "modules": { + "kafka": { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Zookeeper + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-zookeeper", "-p", "2181:2181", "zookeeper:3.9" + ) + + # Wait for Zookeeper to be ready + await self.wait_for_port_open(2181) + + # Start Kafka using wurstmeister/kafka + await asyncio.create_subprocess_exec( + "docker", + "run", + "-d", + "--rm", + "--name", + "bbot-test-kafka", + "--link", + "bbot-test-zookeeper:zookeeper", + "-e", + "KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181", + "-e", + "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092", + "-e", + "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092", + "-e", + "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1", + "-p", + "9092:9092", + "wurstmeister/kafka", + ) + + # Wait for Kafka to be ready + await self.wait_for_port_open(9092) + + await asyncio.sleep(1) + + async def check(self, module_test, events): + from aiokafka import AIOKafkaConsumer + + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() + + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from Kafka + kafka_events = [] + async for msg in self.consumer: + event_data = json.loads(msg.value.decode("utf-8")) + kafka_events.append(event_data) + if len(kafka_events) >= len(events_json): + break + + kafka_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == kafka_events, "Events do not match" + + finally: + # Clean up: Stop the Kafka consumer + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + # Stop Kafka and Zookeeper containers + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-kafka", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-zookeeper", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py new file mode 100644 index 0000000000..25c317e57c --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -0,0 +1,151 @@ +import time +import asyncio + +from .base import ModuleTestBase + + +class TestMongo(ModuleTestBase): + test_db_name = "bbot_test" + test_collection_prefix = "test_" + config_overrides = { + "modules": { + "mongo": { + "database": test_db_name, + "username": "bbot", + "password": "bbotislife", + "collection_prefix": test_collection_prefix, + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-mongo", + "--rm", + "-e", + "MONGO_INITDB_ROOT_USERNAME=bbot", + "-e", + "MONGO_INITDB_ROOT_PASSWORD=bbotislife", + "-p", + "27017:27017", + "-d", + "mongo", + ) + + from motor.motor_asyncio import AsyncIOMotorClient + + # Connect to the MongoDB collection with retry logic + while True: + try: + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") + db = client[self.test_db_name] + events_collection = db.get_collection(self.test_collection_prefix + "events") + # Attempt a simple operation to confirm the connection + await events_collection.count_documents({}) + break # Exit the loop if connection is successful + except Exception as e: + print(f"Connection failed: {e}. Retrying...") + time.sleep(0.5) + + # Check that there are no events in the collection + count = await events_collection.count_documents({}) + assert count == 0, "There are existing events in the database" + + # Close the MongoDB connection + client.close() + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + from motor.motor_asyncio import AsyncIOMotorClient + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") + db = client[self.test_db_name] + events_collection = db.get_collection(self.test_collection_prefix + "events") + + ### INDEXES ### + + # make sure the collection has all the right indexes + cursor = events_collection.list_indexes() + indexes = await cursor.to_list(length=None) + for field in Event.indexed_fields(): + assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + + ### EVENTS ### + + # Fetch all events from the collection + cursor = events_collection.find({}) + db_events = await cursor.to_list(length=None) + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert main_event.get("reverse_host") == expected_reverse_host, ( + f"reverse_host attribute is not correct, expected {expected_reverse_host}" + ) + + # Events don't match exactly because the mongo ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host", None) + db_event.pop("inserted_at", None) + db_event.pop("archived", None) + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + ### SCANS ### + + # Fetch all scans from the collection + cursor = db.get_collection(self.test_collection_prefix + "scans").find({}) + db_scans = await cursor.to_list(length=None) + assert len(db_scans) == 1, "There should be exactly one scan" + db_scan = db_scans[0] + assert db_scan["id"] == main_event["scan"], "Scan id should match main event scan" + + ### TARGETS ### + + # Fetch all targets from the collection + cursor = db.get_collection(self.test_collection_prefix + "targets").find({}) + db_targets = await cursor.to_list(length=None) + assert len(db_targets) == 1, "There should be exactly one target" + db_target = db_targets[0] + scan_event = next(e for e in events if e.type == "SCAN") + assert db_target["hash"] == scan_event.data["target"]["hash"], "Target hash should match scan target hash" + + finally: + # Clean up: Delete all documents in the collection + await events_collection.delete_many({}) + # Close the MongoDB connection + client.close() + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-mongo", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_mysql.py b/bbot/test/test_step_2/module_tests/test_module_mysql.py index 4867c568d5..de30c58f9f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mysql.py +++ b/bbot/test/test_step_2/module_tests/test_module_mysql.py @@ -1,5 +1,4 @@ import asyncio -import time from .base import ModuleTestBase @@ -28,20 +27,8 @@ async def setup_before_prep(self, module_test): ) stdout, stderr = await process.communicate() - import aiomysql - # wait for the container to start - start_time = time.time() - while True: - try: - conn = await aiomysql.connect(user="root", password="bbotislife", db="bbot", host="localhost") - conn.close() - break - except Exception as e: - if time.time() - start_time > 60: # timeout after 60 seconds - self.log.error("MySQL server did not start in time.") - raise e - await asyncio.sleep(1) + await self.wait_for_port_open(3306) if process.returncode != 0: self.log.error(f"Failed to start MySQL server: {stderr.decode()}") diff --git a/bbot/test/test_step_2/module_tests/test_module_nats.py b/bbot/test/test_step_2/module_tests/test_module_nats.py new file mode 100644 index 0000000000..66f4d38937 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_nats.py @@ -0,0 +1,65 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestNats(ModuleTestBase): + config_overrides = { + "modules": { + "nats": { + "servers": ["nats://localhost:4222"], + "subject": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start NATS server + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-nats", "-p", "4222:4222", "nats:latest" + ) + + # Wait for NATS to be ready by checking the port + await self.wait_for_port_open(4222) + + # Connect to NATS + import nats + + try: + self.nc = await nats.connect(["nats://localhost:4222"]) + except Exception as e: + self.log.error(f"Error connecting to NATS: {e}") + raise + + # Collect events from NATS + self.nats_events = [] + + async def message_handler(msg): + event_data = json.loads(msg.data.decode("utf-8")) + self.nats_events.append(event_data) + + await self.nc.subscribe("bbot_events", cb=message_handler) + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + self.nats_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == self.nats_events, "Events do not match" + + finally: + with suppress(Exception): + # Clean up: Stop the NATS client + if self.nc.is_connected: + await self.nc.drain() + await self.nc.close() + # Stop NATS server container + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-nats", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_postgres.py b/bbot/test/test_step_2/module_tests/test_module_postgres.py index ea6c00210c..8c52eabebe 100644 --- a/bbot/test/test_step_2/module_tests/test_module_postgres.py +++ b/bbot/test/test_step_2/module_tests/test_module_postgres.py @@ -1,4 +1,3 @@ -import time import asyncio from .base import ModuleTestBase @@ -25,27 +24,8 @@ async def setup_before_prep(self, module_test): "postgres", ) - import asyncpg - # wait for the container to start - start_time = time.time() - while True: - try: - # Connect to the default 'postgres' database to create 'bbot' - conn = await asyncpg.connect( - user="postgres", password="bbotislife", database="postgres", host="127.0.0.1" - ) - await conn.execute("CREATE DATABASE bbot") - await conn.close() - break - except asyncpg.exceptions.DuplicateDatabaseError: - # If the database already exists, break the loop - break - except Exception as e: - if time.time() - start_time > 60: # timeout after 60 seconds - self.log.error("PostgreSQL server did not start in time.") - raise e - await asyncio.sleep(1) + await self.wait_for_port_open(5432) if process.returncode != 0: self.log.error("Failed to start PostgreSQL server") diff --git a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py new file mode 100644 index 0000000000..c272e0b86c --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py @@ -0,0 +1,71 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestRabbitMQ(ModuleTestBase): + config_overrides = { + "modules": { + "rabbitmq": { + "url": "amqp://guest:guest@localhost/", + "queue": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + import aio_pika + + # Start RabbitMQ + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-rabbitmq", "-p", "5672:5672", "rabbitmq:3-management" + ) + + # Wait for RabbitMQ to be ready + while True: + try: + # Attempt to connect to RabbitMQ with a timeout + connection = await aio_pika.connect_robust("amqp://guest:guest@localhost/") + break # Exit the loop if the connection is successful + except Exception as e: + with suppress(Exception): + await connection.close() + self.log.verbose(f"Waiting for RabbitMQ to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + + async def check(self, module_test, events): + import aio_pika + + connection = await aio_pika.connect_robust("amqp://guest:guest@localhost/") + channel = await connection.channel() + queue = await channel.declare_queue("bbot_events", durable=True) + + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from RabbitMQ + rabbitmq_events = [] + async with queue.iterator() as queue_iter: + async for message in queue_iter: + async with message.process(): + event_data = json.loads(message.body.decode("utf-8")) + rabbitmq_events.append(event_data) + if len(rabbitmq_events) >= len(events_json): + break + + rabbitmq_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == rabbitmq_events, "Events do not match" + + finally: + # Clean up: Close the RabbitMQ connection + await connection.close() + # Stop RabbitMQ container + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-rabbitmq", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_splunk.py b/bbot/test/test_step_2/module_tests/test_module_splunk.py index 8366a6289b..a849055d2b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_splunk.py +++ b/bbot/test/test_step_2/module_tests/test_module_splunk.py @@ -23,7 +23,7 @@ def verify_data(self, j): if not j["index"] == "bbot_index": return False data = j["event"] - if not data["data"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": + if not data["data_json"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": return False return True diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py index ec80b7555d..7970627b15 100644 --- a/bbot/test/test_step_2/module_tests/test_module_sqlite.py +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -8,6 +8,8 @@ class TestSQLite(ModuleTestBase): def check(self, module_test, events): sqlite_output_file = module_test.scan.home / "output.sqlite" assert sqlite_output_file.exists(), "SQLite output file not found" + + # first connect with raw sqlite with sqlite3.connect(sqlite_output_file) as db: cursor = db.cursor() results = cursor.execute("SELECT * FROM event").fetchall() @@ -16,3 +18,15 @@ def check(self, module_test, events): assert len(results) == 1, "No scans found in SQLite database" results = cursor.execute("SELECT * FROM target").fetchall() assert len(results) == 1, "No targets found in SQLite database" + + # then connect with bbot models + from bbot.models.sql import Event + from sqlmodel import create_engine, Session, select + + engine = create_engine(f"sqlite:///{sqlite_output_file}") + + with Session(engine) as session: + statement = select(Event).where(Event.host == "evilcorp.com") + event = session.exec(statement).first() + assert event.host == "evilcorp.com", "Event host should match target host" + assert event.data == "evilcorp.com", "Event data should match target host" diff --git a/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py b/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py index 725a96fecf..1961b50ce8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py +++ b/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py @@ -34,6 +34,6 @@ def check(self, module_test, events): assert any( e.type == "FINDING" and e.data["description"] - == f"Url Manipulation: [body] Sig: [Modified URL: http://127.0.0.1:8888/?{module_test.module.rand_string}=.xml]" + == f"URL Manipulation: [body] Sig: [Modified URL: http://127.0.0.1:8888/?{module_test.module.rand_string}=.xml]" for e in events ) diff --git a/bbot/test/test_step_2/module_tests/test_module_zeromq.py b/bbot/test/test_step_2/module_tests/test_module_zeromq.py new file mode 100644 index 0000000000..8c118570ef --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_zeromq.py @@ -0,0 +1,46 @@ +import json +import zmq +import zmq.asyncio + +from .base import ModuleTestBase + + +class TestZeroMQ(ModuleTestBase): + config_overrides = { + "modules": { + "zeromq": { + "zmq_address": "tcp://localhost:5555", + } + } + } + + async def setup_before_prep(self, module_test): + # Setup ZeroMQ context and socket + self.context = zmq.asyncio.Context() + self.socket = self.context.socket(zmq.SUB) + self.socket.connect("tcp://localhost:5555") + self.socket.setsockopt_string(zmq.SUBSCRIBE, "") + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from ZeroMQ + zmq_events = [] + while len(zmq_events) < len(events_json): + msg = await self.socket.recv() + event_data = json.loads(msg.decode("utf-8")) + zmq_events.append(event_data) + + zmq_events.sort(key=lambda x: x["timestamp"]) + + assert len(events_json) == len(zmq_events), "Number of events does not match" + + # Verify the events match + assert events_json == zmq_events, "Events do not match" + + finally: + # Clean up: Close the ZeroMQ socket + self.socket.close() + self.context.term() diff --git a/docs/scanning/configuration.md b/docs/scanning/configuration.md index 1cbf60490d..177896440c 100644 --- a/docs/scanning/configuration.md +++ b/docs/scanning/configuration.md @@ -30,7 +30,7 @@ You can specify config options either via the command line or the config. For ex bbot -t evilcorp.com -c http_proxy=http://127.0.0.1:8080 ``` -Or, in `~/.config/bbot/config.yml`: +Or, in `~/.config/bbot/bbot.yml`: ```yaml title="~/.bbot/config/bbot.yml" http_proxy: http://127.0.0.1:8080 diff --git a/docs/scanning/output.md b/docs/scanning/output.md index b46eb40c86..6063c0a893 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -155,15 +155,20 @@ config: ### Elasticsearch -When outputting to Elastic, use the `http` output module with the following settings (replace `` with your desired index, e.g. `bbot`): +- Step 1: Spin up a quick Elasticsearch docker image + +```bash +docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 +``` + +- Step 2: Execute a scan with `elastic` output module ```bash # send scan results directly to elasticsearch -bbot -t evilcorp.com -om http -c \ - modules.http.url=http://localhost:8000//_doc \ - modules.http.siem_friendly=true \ - modules.http.username=elastic \ - modules.http.password=changeme +# note: you can replace "bbot" with your own index name +bbot -t evilcorp.com -om elastic -c \ + modules.elastic.url=https://localhost:9200/bbot/_doc \ + modules.elastic.password=bbotislife ``` Alternatively, via a preset: @@ -171,11 +176,9 @@ Alternatively, via a preset: ```yaml title="elastic_preset.yml" config: modules: - http: - url: http://localhost:8000//_doc - siem_friendly: true - username: elastic - password: changeme + elastic: + url: http://localhost:9200/bbot/_doc + password: bbotislife ``` ### Splunk diff --git a/docs/scanning/tips_and_tricks.md b/docs/scanning/tips_and_tricks.md index 52589c4aa7..f91708dd35 100644 --- a/docs/scanning/tips_and_tricks.md +++ b/docs/scanning/tips_and_tricks.md @@ -108,24 +108,6 @@ config: bbot -t evilcorp.com -p skip_cdns.yml ``` -### Ingest BBOT Data Into SIEM (Elastic, Splunk) - -If your goal is to run a BBOT scan and later feed its data into a SIEM such as Elastic, be sure to enable this option when scanning: - -```bash -bbot -t evilcorp.com -c modules.json.siem_friendly=true -``` - -This ensures the `.data` event attribute is always the same type (a dictionary), by nesting it like so: -```json -{ - "type": "DNS_NAME", - "data": { - "DNS_NAME": "blacklanternsecurity.com" - } -} -``` - ### Custom HTTP Proxy Web pentesters may appreciate BBOT's ability to quickly populate Burp Suite site maps for all subdomains in a target. If your scan includes gowitness, this will capture the traffic as if you manually visited each website in your browser -- including auxiliary web resources and javascript API calls. To accomplish this, set the `web.http_proxy` config option like so: diff --git a/pyproject.toml b/pyproject.toml index f36a9dbf9f..4cb6f88cc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bbot" -version = "2.7.1" +version = "3.0.0" description = "OSINT automation for hackers." authors = [ "TheTechromancer", @@ -121,7 +121,7 @@ lint.ignore = ["E402", "E711", "E713", "E721", "E741", "F403", "F405", "E501"] [tool.poetry-dynamic-versioning] enable = true metadata = false -format-jinja = 'v2.7.1{% if branch == "dev" %}.{{ distance }}rc{% endif %}' +format-jinja = 'v3.0.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' [tool.poetry-dynamic-versioning.substitution] files = ["*/__init__.py"]