From e54618aa3b31bb5aa05db6eadf7b2caaaac740c4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:42:49 -0500 Subject: [PATCH 001/147] bump version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8315373da9..ba84c0bb49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bbot" -version = "2.3.0" +version = "3.0.0" description = "OSINT automation for hackers." authors = [ "TheTechromancer", @@ -103,7 +103,7 @@ extend-exclude = "(test_step_1/test_manager_*)" [tool.poetry-dynamic-versioning] enable = true metadata = false -format-jinja = 'v2.3.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' +format-jinja = 'v3.0.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' [tool.poetry-dynamic-versioning.substitution] files = ["*/__init__.py"] From 723ea9abe2422a9bc66dfa3c4755e03d9377045d Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 23:22:52 -0500 Subject: [PATCH 002/147] fix conflict --- bbot/core/event/base.py | 6 +-- bbot/scanner/scanner.py | 3 +- bbot/test/bbot_fixtures.py | 76 ++++++++++++++++++++++++++------------ 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index ce627f6959..5408dadd9f 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,6 +40,7 @@ validators, get_file_extension, ) +from bbot.db.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") @@ -802,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = self.timestamp.isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() # parent event parent_id = self.parent_id if parent_id: @@ -811,8 +812,7 @@ def json(self, mode="json", siem_friendly=False): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - if self.tags: - j.update({"tags": list(self.tags)}) + j.update({"tags": list(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 49114a5b5d..62e5c9d3ab 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -500,7 +500,8 @@ async def setup_modules(self, remove_failed=True): self.modules[module.name].set_error_state() hard_failed.append(module.name) else: - self.info(f"Setup soft-failed for {module.name}: {msg}") + log_fn = self.warning if module._type == "output" else self.info + log_fn(f"Setup soft-failed for {module.name}: {msg}") soft_failed.append(module.name) if (not status) and (module._intercept or remove_failed): # if a intercept module fails setup, we always remove it diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index e1e3aa1b8b..4d73d036c1 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -147,48 +147,78 @@ def helpers(scan): @pytest.fixture def events(scan): + + dummy_module = scan._make_dummy_module("dummy_module") + class bbot_events: - localhost = scan.make_event("127.0.0.1", parent=scan.root_event) - ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event) - netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event) - ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event) - netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event) - domain = scan.make_event("publicAPIs.org", parent=scan.root_event) - subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event) - email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event) - open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event) + localhost = scan.make_event("127.0.0.1", parent=scan.root_event, module=dummy_module) + ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event, module=dummy_module) + netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event, module=dummy_module) + ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event, module=dummy_module) + netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event, module=dummy_module) + domain = scan.make_event("publicAPIs.org", parent=scan.root_event, module=dummy_module) + subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event, module=dummy_module) + email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event, module=dummy_module) + open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event, module=dummy_module) protocol = scan.make_event( - {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, "PROTOCOL", parent=scan.root_event + {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, + "PROTOCOL", + parent=scan.root_event, + module=dummy_module, + ) + ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event, module=dummy_module) + ipv6_open_port = scan.make_event( + "[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event, module=dummy_module + ) + url_unverified = scan.make_event( + "https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv4_url_unverified = scan.make_event( + "https://8.8.8.8:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv6_url_unverified = scan.make_event( + "https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event, module=dummy_module ) - ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event) - ipv6_open_port = scan.make_event("[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event) - url_unverified = scan.make_event("https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event) - ipv4_url_unverified = scan.make_event("https://8.8.8.8:443/hellofriend", parent=scan.root_event) - ipv6_url_unverified = scan.make_event("https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event) url = scan.make_event( - "https://api.publicAPIs.org:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://api.publicAPIs.org:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, ) ipv4_url = scan.make_event( - "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event, module=dummy_module ) ipv6_url = scan.make_event( - "https://[2001:4860:4860::8888]:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://[2001:4860:4860::8888]:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, + ) + url_hint = scan.make_event( + "https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url, module=dummy_module ) - url_hint = scan.make_event("https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url) vulnerability = scan.make_event( {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, "VULNERABILITY", parent=scan.root_event, + module=dummy_module, + ) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event, module=dummy_module + ) + vhost = scan.make_event( + {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module ) - finding = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event) - vhost = scan.make_event({"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event) - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) + http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, "STORAGE_BUCKET", parent=scan.root_event, + module=dummy_module, ) - emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event) + emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event, module=dummy_module) bbot_events.all = [ # noqa: F841 bbot_events.localhost, From f60bd73c053972ee9c76435892734ba77b2af0f5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 17:25:16 -0500 Subject: [PATCH 003/147] resolve conflict --- bbot/core/event/base.py | 2 +- bbot/models/helpers.py | 16 +++ bbot/models/pydantic.py | 111 ++++++++++++++++++ bbot/{db/sql/models.py => models/sql.py} | 0 bbot/modules/output/mongo.py | 68 +++++++++++ bbot/modules/templates/sql.py | 2 +- bbot/test/test_step_1/test_db_models.py | 29 +++++ .../module_tests/test_module_mongo.py | 81 +++++++++++++ 8 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 bbot/models/helpers.py create mode 100644 bbot/models/pydantic.py rename bbot/{db/sql/models.py => models/sql.py} (100%) create mode 100644 bbot/modules/output/mongo.py create mode 100644 bbot/test/test_step_1/test_db_models.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_mongo.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 5408dadd9f..6b1176af65 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,7 +40,7 @@ validators, get_file_extension, ) -from bbot.db.helpers import naive_datetime_validator +from bbot.models.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py new file mode 100644 index 0000000000..40e127c53b --- /dev/null +++ b/bbot/models/helpers.py @@ -0,0 +1,16 @@ +from datetime import datetime +from typing_extensions import Annotated +from pydantic.functional_validators import AfterValidator + + +def naive_datetime_validator(d: datetime): + """ + Converts all dates into UTC, then drops timezone information. + + This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + """ + # drop timezone info + return d.replace(tzinfo=None) + + +NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py new file mode 100644 index 0000000000..0d54cc91b7 --- /dev/null +++ b/bbot/models/pydantic.py @@ -0,0 +1,111 @@ +import json +import logging +from datetime import datetime +from typing import Optional, List, Union, Annotated +from pydantic import BaseModel, ConfigDict, field_serializer + +from bbot.models.helpers import NaiveUTC, naive_datetime_validator + +log = logging.getLogger("bbot_server.models") + + +class BBOTBaseModel(BaseModel): + model_config = ConfigDict(extra="ignore") + + def to_json(self, **kwargs): + return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + + def __hash__(self): + return hash(self.to_json()) + + def __eq__(self, other): + return hash(self) == hash(other) + + +### EVENT ### + +class Event(BBOTBaseModel): + uuid: Annotated[str, "indexed", "unique"] + id: Annotated[str, "indexed"] + type: Annotated[str, "indexed"] + scope_description: str + data: Union[dict, str] + host: Annotated[Optional[str], "indexed"] = None + port: Optional[int] = None + netloc: Optional[str] = None + # we store the host in reverse to allow for instant subdomain queries + # this works because indexes are left-anchored, but we need to search starting from the right side + reverse_host: Annotated[Optional[str], "indexed"] = "" + resolved_hosts: Union[List, None] = None + dns_children: Union[dict, None] = None + web_spider_distance: int = 10 + scope_distance: int = 10 + scan: Annotated[str, "indexed"] + timestamp: Annotated[NaiveUTC, "indexed"] + parent: Annotated[str, "indexed"] + parent_uuid: Annotated[str, "indexed"] + tags: List = [] + module: Annotated[Optional[str], "indexed"] = None + module_sequence: Optional[str] = None + discovery_context: str = "" + discovery_path: List[str] = [] + parent_chain: List[str] = [] + + def __init__(self, **data): + super().__init__(**data) + if self.host: + self.reverse_host = self.host[::-1] + + @staticmethod + def _get_data(data, type): + if isinstance(data, dict) and list(data) == [type]: + return data[type] + return data + + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + + @field_serializer("timestamp") + def serialize_timestamp(self, timestamp: datetime, _info): + return naive_datetime_validator(timestamp).isoformat() + + +### SCAN ### + +class Scan(BBOTBaseModel): + id: Annotated[str, "indexed", "unique"] + name: str + status: Annotated[str, "indexed"] + started_at: Annotated[NaiveUTC, "indexed"] + finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + duration_seconds: Optional[float] = None + duration: Optional[str] = None + target: dict + preset: dict + + @classmethod + def from_scan(cls, scan): + return cls( + id=scan.id, + name=scan.name, + status=scan.status, + started_at=scan.started_at, + ) + + +### TARGET ### + +class Target(BBOTBaseModel): + name: str = "Default Target" + strict_scope: bool = False + seeds: List = [] + whitelist: List = [] + blacklist: List = [] + hash: Annotated[str, "indexed", "unique"] + scope_hash: Annotated[str, "indexed"] + seed_hash: Annotated[str, "indexed"] + whitelist_hash: Annotated[str, "indexed"] + blacklist_hash: Annotated[str, "indexed"] diff --git a/bbot/db/sql/models.py b/bbot/models/sql.py similarity index 100% rename from bbot/db/sql/models.py rename to bbot/models/sql.py diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py new file mode 100644 index 0000000000..dd4efa47ce --- /dev/null +++ b/bbot/modules/output/mongo.py @@ -0,0 +1,68 @@ +from motor.motor_asyncio import AsyncIOMotorClient + +from bbot.models.pydantic import Event +from bbot.modules.output.base import BaseOutputModule + + +class Mongo(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a MongoDB database", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "uri": "mongodb://localhost:27017", + "database": "bbot", + "collection_prefix": "", + } + options_desc = { + "uri": "The URI of the MongoDB server", + "database": "The name of the database to use", + "collection_prefix": "Prefix each collection with this string", + } + deps_pip = ["motor~=3.6.0"] + + async def setup(self): + self.uri = self.config.get("uri", "mongodb://localhost:27017") + self.db_client = AsyncIOMotorClient(self.uri) + + # Ping the server to confirm a successful connection + try: + await self.db_client.admin.command("ping") + self.verbose("MongoDB connection successful") + except Exception as e: + return False, f"Failed to connect to MongoDB: {e}" + + self.db_name = self.config.get("database", "bbot") + self.db = self.db_client[self.db_name] + self.collection_prefix = self.config.get("collection_prefix", "") + self.events_collection = self.db[f"{self.collection_prefix}events"] + self.scans_collection = self.db[f"{self.collection_prefix}scans"] + self.targets_collection = self.db[f"{self.collection_prefix}targets"] + + # Build an index for each field in reverse_host and host + for field in Event._indexed_fields(): + await self.collection.create_index([(field, 1)]) + self.verbose(f"Index created for field: {field}") + + return True + + async def handle_event(self, event): + event_json = event.json() + event_pydantic = Event(**event_json) + await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": + # here we merge the scan with the one sharing its UUID. + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + # Merge logic here, for example, update the existing scan with new data + updated_scan = {**existing_scan, **event_pydantic.model_dump()} + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py index 39b4e6f00e..42f5494555 100644 --- a/bbot/modules/templates/sql.py +++ b/bbot/modules/templates/sql.py @@ -3,7 +3,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from bbot.db.sql.models import Event, Scan, Target +from bbot.models.sql import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py new file mode 100644 index 0000000000..4e003f6f57 --- /dev/null +++ b/bbot/test/test_step_1/test_db_models.py @@ -0,0 +1,29 @@ +from bbot.models.pydantic import Event +from ..bbot_fixtures import * # noqa + + +def test_pydantic_models(events): + + test_event = Event(**events.ipv4.json()) + assert sorted(test_event._indexed_fields()) == [ + "host", + "id", + "module", + "parent", + "parent_uuid", + "reverse_host", + "scan", + "timestamp", + "type", + "uuid", + ] + + # events + for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + e = getattr(events, event) + event_json = e.json() + event_pydantic = Event(**event_json) + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + + +# TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py new file mode 100644 index 0000000000..10a8655e81 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -0,0 +1,81 @@ +from .base import ModuleTestBase + + +class TestMongo(ModuleTestBase): + test_db_name = "bbot_test" + test_collection_name = "events_test" + config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + + async def setup_before_module(self): + from motor.motor_asyncio import AsyncIOMotorClient + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # Check that there are no events in the collection + count = await collection.count_documents({}) + assert count == 0, "There are existing events in the database" + + # Close the MongoDB connection + client.close() + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + from motor.motor_asyncio import AsyncIOMotorClient + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # make sure the collection has all the right indexes + cursor = collection.list_indexes() + indexes = await cursor.to_list(length=None) + for field in Event._indexed_fields(): + assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + + # Fetch all events from the collection + cursor = collection.find({}) + db_events = await cursor.to_list(length=None) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Compare the sorted lists + assert len(events_json) == len(db_events_pydantic) + # Events don't match exactly because the mongo ones have reverse_host + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the collection + await collection.delete_many({}) + # Close the MongoDB connection + client.close() From 9e1e84ac8485246fd3ecd3387254200a146345f9 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 18:39:37 -0500 Subject: [PATCH 004/147] more wip mongo --- bbot/models/helpers.py | 6 ++++- bbot/models/pydantic.py | 31 +++++++++++++++++-------- bbot/modules/output/mongo.py | 19 +++++++-------- bbot/test/test_step_1/test_db_models.py | 8 +++++++ 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 40e127c53b..985c845994 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -3,7 +3,7 @@ from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime): +def naive_datetime_validator(d: datetime) -> datetime: """ Converts all dates into UTC, then drops timezone information. @@ -13,4 +13,8 @@ def naive_datetime_validator(d: datetime): return d.replace(tzinfo=None) +def naive_utc_now() -> datetime: + return naive_datetime_validator(datetime.now()) + + NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0d54cc91b7..fe179878e7 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -2,9 +2,9 @@ import logging from datetime import datetime from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer +from pydantic import BaseModel, ConfigDict, field_serializer, Field -from bbot.models.helpers import NaiveUTC, naive_datetime_validator +from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,8 +12,18 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, **kwargs): - return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + def to_json(self, preserve_datetime=False): + ret = self.model_dump() + if preserve_datetime: + for key in ret: + val = getattr(self, key, None) + if isinstance(val, datetime): + ret[key] = val + return ret + + def to_json_string(self, preserve_datetime=False, **kwargs): + kwargs['sort_keys'] = True + return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) def __hash__(self): return hash(self.to_json()) @@ -21,6 +31,12 @@ def __hash__(self): def __eq__(self, other): return hash(self) == hash(other) + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + ### EVENT ### @@ -42,6 +58,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[NaiveUTC, "indexed"] + inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -62,12 +79,6 @@ def _get_data(data, type): return data[type] return data - @classmethod - def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) - @field_serializer("timestamp") def serialize_timestamp(self, timestamp: datetime, _info): return naive_datetime_validator(timestamp).isoformat() diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index dd4efa47ce..bb92d19d8a 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -1,6 +1,6 @@ from motor.motor_asyncio import AsyncIOMotorClient -from bbot.models.pydantic import Event +from bbot.models.pydantic import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule @@ -42,9 +42,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event._indexed_fields(): - await self.collection.create_index([(field, 1)]) - self.verbose(f"Index created for field: {field}") + for field in Event.model_fields: + if "indexed" in field.metadata: + unique = "unique" in field.metadata + await self.collection.create_index([(field, 1)], unique=unique) + self.verbose(f"Index created for field: {field}") return True @@ -52,17 +54,14 @@ async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": - # here we merge the scan with the one sharing its UUID. + scan_json = Scan.from_event(event).model_dump() existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) if existing_scan: - # Merge logic here, for example, update the existing scan with new data - updated_scan = {**existing_scan, **event_pydantic.model_dump()} - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") - - diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 4e003f6f57..1ba970f0e7 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,3 +1,5 @@ +from datetime import datetime + from bbot.models.pydantic import Event from ..bbot_fixtures import * # noqa @@ -23,6 +25,12 @@ def test_pydantic_models(events): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) + event_pydantic_dict = event_pydantic.to_json() + event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + assert isinstance(event_pydantic_dict["timestamp"], str) + assert isinstance(event_pydantic_dict["inserted_at"], str) + assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) + assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json From 1ebb942b0b5c522d1032a6b6f6299e60a6df6677 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 20:08:24 -0500 Subject: [PATCH 005/147] more mongo wip --- bbot/models/pydantic.py | 66 ++++++++------- bbot/modules/output/mongo.py | 34 ++++---- bbot/test/test_step_1/test_db_models.py | 14 +++- .../module_tests/test_module_mongo.py | 81 +++++++++++++++---- 4 files changed, 133 insertions(+), 62 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index fe179878e7..906801693a 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,10 +1,9 @@ -import json import logging from datetime import datetime -from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer, Field +from pydantic import BaseModel, ConfigDict, Field +from typing import Optional, List, Union, Annotated, get_type_hints -from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now +from bbot.models.helpers import NaiveUTC, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,19 +11,14 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, preserve_datetime=False): - ret = self.model_dump() - if preserve_datetime: - for key in ret: - val = getattr(self, key, None) - if isinstance(val, datetime): - ret[key] = val + def model_dump(self, preserve_datetime=False, **kwargs): + ret = super().model_dump(**kwargs) + if not preserve_datetime: + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].isoformat() return ret - def to_json_string(self, preserve_datetime=False, **kwargs): - kwargs['sort_keys'] = True - return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) - def __hash__(self): return hash(self.to_json()) @@ -33,13 +27,37 @@ def __eq__(self, other): @classmethod def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) + return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) + + @classmethod + def _get_type_hints(cls): + """ + Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + """ + type_hints = get_type_hints(cls) + unwrapped_type_hints = {} + for field_name in cls.model_fields: + type_hint = type_hints[field_name] + while 1: + if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + type_hint = type_hint.__args__[0] + else: + break + unwrapped_type_hints[field_name] = type_hint + return unwrapped_type_hints + + @classmethod + def _datetime_fields(cls): + datetime_fields = [] + for field_name, type_hint in cls._get_type_hints().items(): + if type_hint == datetime: + datetime_fields.append(field_name) + return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -73,19 +91,10 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] - @staticmethod - def _get_data(data, type): - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data - - @field_serializer("timestamp") - def serialize_timestamp(self, timestamp: datetime, _info): - return naive_datetime_validator(timestamp).isoformat() - ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -109,6 +118,7 @@ def from_scan(cls, scan): ### TARGET ### + class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bb92d19d8a..bc323d7ad9 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -14,18 +14,24 @@ class Mongo(BaseOutputModule): options = { "uri": "mongodb://localhost:27017", "database": "bbot", + "username": "", + "password": "", "collection_prefix": "", } options_desc = { "uri": "The URI of the MongoDB server", "database": "The name of the database to use", + "username": "The username to use to connect to the database", + "password": "The password to use to connect to the database", "collection_prefix": "Prefix each collection with this string", } deps_pip = ["motor~=3.6.0"] async def setup(self): self.uri = self.config.get("uri", "mongodb://localhost:27017") - self.db_client = AsyncIOMotorClient(self.uri) + self.username = self.config.get("username", "") + self.password = self.config.get("password", "") + self.db_client = AsyncIOMotorClient(self.uri, username=self.username, password=self.password) # Ping the server to confirm a successful connection try: @@ -42,11 +48,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event.model_fields: + for field_name, field in Event.model_fields.items(): if "indexed" in field.metadata: unique = "unique" in field.metadata - await self.collection.create_index([(field, 1)], unique=unique) - self.verbose(f"Index created for field: {field}") + await self.events_collection.create_index([(field_name, 1)], unique=unique) + self.verbose(f"Index created for field: {field_name} (unique={unique})") return True @@ -55,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - else: - # Insert as a new scan if no existing scan is found - await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + # if event.type == "SCAN": + # scan_json = Scan.from_event(event).model_dump() + # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + # if existing_scan: + # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + # else: + # # Insert as a new scan if no existing scan is found + # await self.scans_collection.insert_one(event_pydantic.model_dump()) + # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 1ba970f0e7..5a6fce547c 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -6,6 +6,8 @@ def test_pydantic_models(events): + assert Event._datetime_fields() == ["inserted_at", "timestamp"] + test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ "host", @@ -20,18 +22,22 @@ def test_pydantic_models(events): "uuid", ] - # events + # convert events to pydantic and back, making sure they're exactly the same for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) - event_pydantic_dict = event_pydantic.to_json() - event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + event_pydantic_dict = event_pydantic.model_dump() + event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) + assert isinstance(event_json["timestamp"], str) + assert isinstance(e.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, datetime) + assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], str) assert isinstance(event_pydantic_dict["inserted_at"], str) assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json # TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 10a8655e81..839e46156e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -1,21 +1,58 @@ +import time +import asyncio + from .base import ModuleTestBase class TestMongo(ModuleTestBase): test_db_name = "bbot_test" - test_collection_name = "events_test" - config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + test_collection_prefix = "test_" + config_overrides = { + "modules": { + "mongo": { + "database": test_db_name, + "username": "bbot", + "password": "bbotislife", + "collection_prefix": test_collection_prefix, + } + } + } + + async def setup_before_prep(self, module_test): + + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-mongo", + "--rm", + "-e", + "MONGO_INITDB_ROOT_USERNAME=bbot", + "-e", + "MONGO_INITDB_ROOT_PASSWORD=bbotislife", + "-p", + "27017:27017", + "-d", + "mongo", + ) - async def setup_before_module(self): from motor.motor_asyncio import AsyncIOMotorClient - # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") - db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + # Connect to the MongoDB collection with retry logic + while True: + try: + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") + db = client[self.test_db_name] + events_collection = db.get_collection(self.test_collection_prefix + "events") + # Attempt a simple operation to confirm the connection + await events_collection.count_documents({}) + break # Exit the loop if connection is successful + except Exception as e: + print(f"Connection failed: {e}. Retrying in 5 seconds...") + time.sleep(0.5) # Check that there are no events in the collection - count = await collection.count_documents({}) + count = await events_collection.count_documents({}) assert count == 0, "There are existing events in the database" # Close the MongoDB connection @@ -30,20 +67,30 @@ async def check(self, module_test, events): events_json.sort(key=lambda x: x["timestamp"]) # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + events_collection = db.get_collection(self.test_collection_prefix + "events") # make sure the collection has all the right indexes - cursor = collection.list_indexes() + cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" # Fetch all events from the collection - cursor = collection.find({}) + cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + # we currently don't store timestamps as datetime objects because mongodb has lower precision + # assert isinstance(db_event["timestamp"], datetime) + # assert isinstance(db_event["inserted_at"], datetime) + assert isinstance(db_event["timestamp"], str) + assert isinstance(db_event["inserted_at"], str) + # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] db_events_pydantic.sort(key=lambda x: x["timestamp"]) @@ -65,17 +112,19 @@ async def check(self, module_test, events): main_event.get("reverse_host") == expected_reverse_host ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" - # Compare the sorted lists - assert len(events_json) == len(db_events_pydantic) - # Events don't match exactly because the mongo ones have reverse_host + # Events don't match exactly because the mongo ones have reverse_host and inserted_at assert events_json != db_events_pydantic for db_event in db_events_pydantic: db_event.pop("reverse_host") + db_event.pop("inserted_at") # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" finally: # Clean up: Delete all documents in the collection - await collection.delete_many({}) + await events_collection.delete_many({}) # Close the MongoDB connection client.close() + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-mongo", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 23598a3e2d096204e14efb3bb0c02943cc486492 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 20 Nov 2024 11:54:12 -0500 Subject: [PATCH 006/147] skip distro tests --- bbot/test/test_step_2/module_tests/test_module_mongo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 839e46156e..31e7f70747 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -17,6 +17,7 @@ class TestMongo(ModuleTestBase): } } } + skip_distro_tests = True async def setup_before_prep(self, module_test): @@ -48,7 +49,7 @@ async def setup_before_prep(self, module_test): await events_collection.count_documents({}) break # Exit the loop if connection is successful except Exception as e: - print(f"Connection failed: {e}. Retrying in 5 seconds...") + print(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Check that there are no events in the collection From 9af4583ef573613733caf25e4c4a7195f47299ae Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 10:35:12 -0500 Subject: [PATCH 007/147] more wip mongo --- bbot/core/event/base.py | 8 ++++++-- bbot/models/pydantic.py | 10 +++++----- bbot/modules/output/mongo.py | 20 ++++++++++---------- bbot/test/bbot_fixtures.py | 14 +++++++------- bbot/test/test_step_1/test_db_models.py | 9 +++------ bbot/test/test_step_1/test_events.py | 6 +++--- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 6b1176af65..53e2c62236 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -803,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1773,7 +1773,11 @@ def event_from_json(j, siem_friendly=False): resolved_hosts = j.get("resolved_hosts", []) event._resolved_hosts = set(resolved_hosts) - event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) + # accept both isoformat and unix timestamp + try: + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + except Exception: + event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] parent_id = j.get("parent", None) if parent_id is not None: diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 906801693a..388d85f05f 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -11,12 +11,12 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, preserve_datetime=False, **kwargs): + def model_dump(self, **kwargs): ret = super().model_dump(**kwargs) - if not preserve_datetime: - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].isoformat() + # convert datetime fields to unix timestamps + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].timestamp() return ret def __hash__(self): diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bc323d7ad9..03185b169c 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -61,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - # if event.type == "SCAN": - # scan_json = Scan.from_event(event).model_dump() - # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - # if existing_scan: - # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - # else: - # # Insert as a new scan if no existing scan is found - # await self.scans_collection.insert_one(event_pydantic.model_dump()) - # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + if event.type == "SCAN": + scan_json = Scan.from_event(event).model_dump() + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 4d73d036c1..229c58a290 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -@pytest.fixture(scope="session", autouse=True) -def install_all_python_deps(): - deps_pip = set() - for module in DEFAULT_PRESET.module_loader.preloaded().values(): - deps_pip.update(set(module.get("deps", {}).get("pip", []))) +# @pytest.fixture(scope="session", autouse=True) +# def install_all_python_deps(): +# deps_pip = set() +# for module in DEFAULT_PRESET.module_loader.preloaded().values(): +# deps_pip.update(set(module.get("deps", {}).get("pip", []))) - constraint_file = tempwordlist(get_python_constraints()) +# constraint_file = tempwordlist(get_python_constraints()) - subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) +# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 5a6fce547c..d29e7e79a8 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -28,15 +28,12 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) - assert isinstance(event_json["timestamp"], str) + assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json - assert isinstance(event_pydantic_dict["timestamp"], str) - assert isinstance(event_pydantic_dict["inserted_at"], str) - assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) - assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) + assert isinstance(event_pydantic_dict["timestamp"], float) + assert isinstance(event_pydantic_dict["inserted_at"], float) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 8156fc7969..5c6dedad8a 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.isoformat() + timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) @@ -515,7 +515,7 @@ async def test_events(events, helpers): assert reconstituted_event.uuid == db_event.uuid assert reconstituted_event.parent_uuid == scan.root_event.uuid assert reconstituted_event.scope_distance == 1 - assert reconstituted_event.timestamp.isoformat() == timestamp + assert reconstituted_event.timestamp.timestamp() == timestamp assert reconstituted_event.data == "evilcorp.com:80" assert reconstituted_event.type == "OPEN_TCP_PORT" assert reconstituted_event.host == "evilcorp.com" @@ -538,7 +538,7 @@ async def test_events(events, helpers): assert json_event_siemfriendly["timestamp"] == timestamp reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.isoformat() == timestamp + assert reconstituted_event2.timestamp.timestamp() == timestamp assert reconstituted_event2.data == "evilcorp.com:80" assert reconstituted_event2.type == "OPEN_TCP_PORT" assert reconstituted_event2.host == "evilcorp.com" From e4e1c99f312530a13b46463228adeeb55a0d8706 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 19:14:15 -0500 Subject: [PATCH 008/147] remove siem_friendly --- bbot/core/event/base.py | 18 +++++++--------- bbot/models/pydantic.py | 14 ++++++++----- bbot/models/sql.py | 21 +++++++------------ bbot/modules/output/http.py | 5 +---- bbot/modules/output/json.py | 6 ++---- bbot/modules/output/mongo.py | 8 +++++++ bbot/test/test_step_1/test_events.py | 21 +++++-------------- .../module_tests/test_module_http.py | 9 -------- .../module_tests/test_module_json.py | 15 ------------- .../module_tests/test_module_mongo.py | 20 +++++++++++++----- docs/scanning/tips_and_tricks.md | 18 ---------------- 11 files changed, 55 insertions(+), 100 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 53e2c62236..29f10190e2 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -756,7 +756,7 @@ def __contains__(self, other): return bool(radixtarget.search(other.host)) return False - def json(self, mode="json", siem_friendly=False): + def json(self, mode="json"): """ Serializes the event object to a JSON-compatible dictionary. @@ -765,7 +765,6 @@ def json(self, mode="json", siem_friendly=False): Parameters: mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id". - siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary). Returns: dict: JSON-serializable dictionary representation of the event object. @@ -782,10 +781,12 @@ def json(self, mode="json", siem_friendly=False): data = data_attr else: data = smart_decode(self.data) - if siem_friendly: - j["data"] = {self.type: data} - else: + if isinstance(data, str): j["data"] = data + elif isinstance(data, dict): + j["data_json"] = data + else: + raise ValueError(f"Invalid data type: {type(data)}") # host, dns children if self.host: j["host"] = str(self.host) @@ -1728,7 +1729,7 @@ def make_event( ) -def event_from_json(j, siem_friendly=False): +def event_from_json(j): """ Creates an event object from a JSON dictionary. @@ -1760,10 +1761,7 @@ def event_from_json(j, siem_friendly=False): "context": j.get("discovery_context", None), "dummy": True, } - if siem_friendly: - data = j["data"][event_type] - else: - data = j["data"] + data = j.get("data_json", j.get("data", None)) kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 388d85f05f..0591a93515 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -57,13 +57,13 @@ def _datetime_fields(cls): ### EVENT ### - class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] type: Annotated[str, "indexed"] scope_description: str - data: Union[dict, str] + data: Annotated[Optional[str], "indexed"] = None + data_json: Optional[dict] = None host: Annotated[Optional[str], "indexed"] = None port: Optional[int] = None netloc: Optional[str] = None @@ -75,8 +75,8 @@ class Event(BBOTBaseModel): web_spider_distance: int = 10 scope_distance: int = 10 scan: Annotated[str, "indexed"] - timestamp: Annotated[NaiveUTC, "indexed"] - inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) + timestamp: Annotated[float, "indexed"] + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -91,9 +91,13 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] + def get_data(self): + if self.data is not None: + return self.data + return self.data_json -### SCAN ### +### SCAN ### class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index e937fad1e6..2640e3ca81 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -67,25 +67,19 @@ def __eq__(self, other): ### EVENT ### - class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - data = self._get_data(self.data, self.type) - self.data = {self.type: data} + if self.data is None and self.data_json is None: + raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] def get_data(self): - return self._get_data(self.data, self.type) - - @staticmethod - def _get_data(data, type): - # handle SIEM-friendly format - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data + if self.data is not None: + return self.data + return self.data_json uuid: str = Field( primary_key=True, @@ -95,7 +89,8 @@ def _get_data(data, type): id: str = Field(index=True) type: str = Field(index=True) scope_description: str - data: dict = Field(sa_type=JSON) + data: Optional[str] = Field(default=None, index=True) + data_json: Optional[dict] = Field(default=None) host: Optional[str] port: Optional[int] netloc: Optional[str] @@ -119,7 +114,6 @@ def _get_data(data, type): ### SCAN ### - class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -134,7 +128,6 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### - class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 9d9241da0b..7d94148d72 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -15,7 +15,6 @@ class HTTP(BaseOutputModule): "username": "", "password": "", "timeout": 10, - "siem_friendly": False, } options_desc = { "url": "Web URL", @@ -24,14 +23,12 @@ class HTTP(BaseOutputModule): "username": "Username (basic auth)", "password": "Password (basic auth)", "timeout": "HTTP timeout", - "siem_friendly": "Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc.", } async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) - self.siem_friendly = self.config.get("siem_friendly", False) self.headers = {} bearer = self.config.get("bearer", "") if bearer: @@ -56,7 +53,7 @@ async def handle_event(self, event): method=self.method, auth=self.auth, headers=self.headers, - json=event.json(siem_friendly=self.siem_friendly), + json=event.json(), ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/modules/output/json.py b/bbot/modules/output/json.py index a35fa6aed7..b93d1e4e3f 100644 --- a/bbot/modules/output/json.py +++ b/bbot/modules/output/json.py @@ -11,20 +11,18 @@ class JSON(BaseOutputModule): "created_date": "2022-04-07", "author": "@TheTechromancer", } - options = {"output_file": "", "siem_friendly": False} + options = {"output_file": ""} options_desc = { "output_file": "Output to file", - "siem_friendly": "Output JSON in a SIEM-friendly format for ingestion into Elastic, Splunk, etc.", } _preserve_graph = True async def setup(self): self._prep_output_dir("output.json") - self.siem_friendly = self.config.get("siem_friendly", False) return True async def handle_event(self, event): - event_json = event.json(siem_friendly=self.siem_friendly) + event_json = event.json() event_str = json.dumps(event_json) if self.file is not None: self.file.write(event_str + "\n") diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 03185b169c..5e555ab0ff 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -71,3 +71,11 @@ async def handle_event(self, event): # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + target_data = scan_json.get("target", {}) + target = Target(**target_data) + existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + if existing_target: + await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + else: + await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 5c6dedad8a..a940dbce06 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -529,28 +529,17 @@ async def test_events(events, helpers): assert hostless_event_json["data"] == "asdf" assert not "host" in hostless_event_json - # SIEM-friendly serialize/deserialize - json_event_siemfriendly = db_event.json(siem_friendly=True) - assert json_event_siemfriendly["scope_distance"] == 1 - assert json_event_siemfriendly["data"] == {"OPEN_TCP_PORT": "evilcorp.com:80"} - assert json_event_siemfriendly["type"] == "OPEN_TCP_PORT" - assert json_event_siemfriendly["host"] == "evilcorp.com" - assert json_event_siemfriendly["timestamp"] == timestamp - reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) - assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.timestamp() == timestamp - assert reconstituted_event2.data == "evilcorp.com:80" - assert reconstituted_event2.type == "OPEN_TCP_PORT" - assert reconstituted_event2.host == "evilcorp.com" - assert "127.0.0.1" in reconstituted_event2.resolved_hosts - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" json_event = http_response.json(mode="graph") + assert "data" in json_event + assert "data_json" not in json_event assert isinstance(json_event["data"], str) json_event = http_response.json() - assert isinstance(json_event["data"], dict) + assert "data" not in json_event + assert "data_json" in json_event + assert isinstance(json_event["data_json"], dict) assert json_event["type"] == "HTTP_RESPONSE" assert json_event["host"] == "example.com" assert json_event["parent"] == scan.root_event.id diff --git a/bbot/test/test_step_2/module_tests/test_module_http.py b/bbot/test/test_step_2/module_tests/test_module_http.py index 43b7189adf..d634765425 100644 --- a/bbot/test/test_step_2/module_tests/test_module_http.py +++ b/bbot/test/test_step_2/module_tests/test_module_http.py @@ -52,12 +52,3 @@ def check(self, module_test, events): assert self.headers_correct == True assert self.method_correct == True assert self.url_correct == True - - -class TestHTTPSIEMFriendly(TestHTTP): - modules_overrides = ["http"] - config_overrides = {"modules": {"http": dict(TestHTTP.config_overrides["modules"]["http"])}} - config_overrides["modules"]["http"]["siem_friendly"] = True - - def verify_data(self, j): - return j["data"] == {"DNS_NAME": "blacklanternsecurity.com"} and j["type"] == "DNS_NAME" diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index 27ed5a55e0..bf79eeb13f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -53,18 +53,3 @@ def check(self, module_test, events): assert dns_reconstructed.discovery_context == context_data assert dns_reconstructed.discovery_path == [context_data] assert dns_reconstructed.parent_chain == [dns_json["uuid"]] - - -class TestJSONSIEMFriendly(ModuleTestBase): - modules_overrides = ["json"] - config_overrides = {"modules": {"json": {"siem_friendly": True}}} - - def check(self, module_test, events): - txt_file = module_test.scan.home / "output.json" - lines = list(module_test.scan.helpers.read_file(txt_file)) - passed = False - for line in lines: - e = json.loads(line) - if e["data"] == {"DNS_NAME": "blacklanternsecurity.com"}: - passed = True - assert passed diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 31e7f70747..fcfed7841a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -72,12 +72,16 @@ async def check(self, module_test, events): db = client[self.test_db_name] events_collection = db.get_collection(self.test_collection_prefix + "events") + ### INDEXES ### + # make sure the collection has all the right indexes cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + ### EVENTS ### + # Fetch all events from the collection cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) @@ -86,11 +90,8 @@ async def check(self, module_test, events): assert len(events_json) == len(db_events) for db_event in db_events: - # we currently don't store timestamps as datetime objects because mongodb has lower precision - # assert isinstance(db_event["timestamp"], datetime) - # assert isinstance(db_event["inserted_at"], datetime) - assert isinstance(db_event["timestamp"], str) - assert isinstance(db_event["inserted_at"], str) + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] @@ -121,6 +122,15 @@ async def check(self, module_test, events): # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" + ### SCANS ### + + # Fetch all scans from the collection + cursor = db.get_collection(self.test_collection_prefix + "scans").find({}) + db_scans = await cursor.to_list(length=None) + assert len(db_scans) == 1, "There should be exactly one scan" + db_scan = db_scans[0] + assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + finally: # Clean up: Delete all documents in the collection await events_collection.delete_many({}) diff --git a/docs/scanning/tips_and_tricks.md b/docs/scanning/tips_and_tricks.md index c5073c1d63..e13d82875e 100644 --- a/docs/scanning/tips_and_tricks.md +++ b/docs/scanning/tips_and_tricks.md @@ -108,24 +108,6 @@ config: bbot -t evilcorp.com -p skip_cdns.yml ``` -### Ingest BBOT Data Into SIEM (Elastic, Splunk) - -If your goal is to run a BBOT scan and later feed its data into a SIEM such as Elastic, be sure to enable this option when scanning: - -```bash -bbot -t evilcorp.com -c modules.json.siem_friendly=true -``` - -This ensures the `.data` event attribute is always the same type (a dictionary), by nesting it like so: -```json -{ - "type": "DNS_NAME", - "data": { - "DNS_NAME": "blacklanternsecurity.com" - } -} -``` - ### Custom HTTP Proxy Web pentesters may appreciate BBOT's ability to quickly populate Burp Suite site maps for all subdomains in a target. If your scan includes gowitness, this will capture the traffic as if you manually visited each website in your browser -- including auxiliary web resources and javascript API calls. To accomplish this, set the `web.http_proxy` config option like so: From d9a3d9d251bf9af914077ea027f04011dbcde600 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:00:28 -0500 Subject: [PATCH 009/147] steady work on mongo, bbot 3.0 --- bbot/core/event/base.py | 7 +- bbot/models/helpers.py | 20 +++--- bbot/models/pydantic.py | 71 +++++++++---------- bbot/models/sql.py | 27 +++---- bbot/modules/output/mongo.py | 16 ++--- bbot/scanner/scanner.py | 12 ++-- bbot/test/bbot_fixtures.py | 14 ++-- bbot/test/test_step_1/test_db_models.py | 25 ++++++- bbot/test/test_step_1/test_events.py | 2 +- .../module_tests/test_module_mongo.py | 12 +++- .../module_tests/test_module_sqlite.py | 14 ++++ 11 files changed, 128 insertions(+), 92 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 29f10190e2..bd6e884b37 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -12,6 +12,7 @@ from copy import copy from pathlib import Path from typing import Optional +from zoneinfo import ZoneInfo from contextlib import suppress from radixtarget import RadixTarget from urllib.parse import urljoin, parse_qs @@ -40,7 +41,7 @@ validators, get_file_extension, ) -from bbot.models.helpers import naive_datetime_validator +from bbot.models.helpers import utc_datetime_validator log = logging.getLogger("bbot.core.event") @@ -804,7 +805,7 @@ def json(self, mode="json"): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() + j["timestamp"] = utc_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1773,7 +1774,7 @@ def event_from_json(j): # accept both isoformat and unix timestamp try: - event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"], ZoneInfo("UTC")) except Exception: event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 985c845994..c7fc078a45 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,20 +1,22 @@ +from datetime import UTC from datetime import datetime from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime) -> datetime: +def utc_datetime_validator(d: datetime) -> datetime: """ - Converts all dates into UTC, then drops timezone information. - - This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + Converts all dates into UTC """ - # drop timezone info - return d.replace(tzinfo=None) + if d.tzinfo is not None: + return d.astimezone(UTC) + else: + return d.replace(tzinfo=UTC) -def naive_utc_now() -> datetime: - return naive_datetime_validator(datetime.now()) +def utc_now() -> datetime: + return datetime.now(UTC) -NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] +def utc_now_timestamp() -> datetime: + return utc_now().timestamp() diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0591a93515..356ab2e44c 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,9 +1,8 @@ import logging -from datetime import datetime from pydantic import BaseModel, ConfigDict, Field -from typing import Optional, List, Union, Annotated, get_type_hints +from typing import Optional, List, Union, Annotated -from bbot.models.helpers import NaiveUTC, naive_utc_now +from bbot.models.helpers import utc_now_timestamp log = logging.getLogger("bbot_server.models") @@ -11,14 +10,6 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, **kwargs): - ret = super().model_dump(**kwargs) - # convert datetime fields to unix timestamps - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].timestamp() - return ret - def __hash__(self): return hash(self.to_json()) @@ -29,34 +20,37 @@ def __eq__(self, other): def _indexed_fields(cls): return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) - @classmethod - def _get_type_hints(cls): - """ - Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint - """ - type_hints = get_type_hints(cls) - unwrapped_type_hints = {} - for field_name in cls.model_fields: - type_hint = type_hints[field_name] - while 1: - if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): - type_hint = type_hint.__args__[0] - else: - break - unwrapped_type_hints[field_name] = type_hint - return unwrapped_type_hints - - @classmethod - def _datetime_fields(cls): - datetime_fields = [] - for field_name, type_hint in cls._get_type_hints().items(): - if type_hint == datetime: - datetime_fields.append(field_name) - return sorted(datetime_fields) + # we keep these because they were a lot of work to make and maybe someday they'll be useful again + + # @classmethod + # def _get_type_hints(cls): + # """ + # Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + # """ + # type_hints = get_type_hints(cls) + # unwrapped_type_hints = {} + # for field_name in cls.model_fields: + # type_hint = type_hints[field_name] + # while 1: + # if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + # type_hint = type_hint.__args__[0] + # else: + # break + # unwrapped_type_hints[field_name] = type_hint + # return unwrapped_type_hints + + # @classmethod + # def _datetime_fields(cls): + # datetime_fields = [] + # for field_name, type_hint in cls._get_type_hints().items(): + # if type_hint == datetime: + # datetime_fields.append(field_name) + # return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -76,7 +70,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[float, "indexed"] - inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=utc_now_timestamp) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -99,12 +93,13 @@ def get_data(self): ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str status: Annotated[str, "indexed"] - started_at: Annotated[NaiveUTC, "indexed"] - finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + started_at: Annotated[float, "indexed"] + finished_at: Annotated[Optional[float], "indexed"] = None duration_seconds: Optional[float] = None duration: Optional[str] = None target: dict diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 2640e3ca81..8e3e059b00 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -3,13 +3,15 @@ import json import logging +from datetime import datetime from pydantic import ConfigDict from typing import List, Optional -from datetime import datetime, timezone from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime +from bbot.models.helpers import utc_now_timestamp + log = logging.getLogger("bbot_server.models") @@ -27,14 +29,6 @@ def naive_datetime_validator(d: datetime): NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - # handle datetime - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) - - class BBOTBaseModel(SQLModel): model_config = ConfigDict(extra="ignore") @@ -52,7 +46,7 @@ def validated(self): return self def to_json(self, **kwargs): - return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs) + return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) @classmethod def _pk_column_names(cls): @@ -67,12 +61,11 @@ def __eq__(self, other): ### EVENT ### + class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - if self.data is None and self.data_json is None: - raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] @@ -88,12 +81,12 @@ def get_data(self): ) id: str = Field(index=True) type: str = Field(index=True) - scope_description: str data: Optional[str] = Field(default=None, index=True) - data_json: Optional[dict] = Field(default=None) + data_json: Optional[dict] = Field(default=None, sa_type=JSON) host: Optional[str] port: Optional[int] netloc: Optional[str] + scope_description: str # store the host in reversed form for efficient lookups by domain reverse_host: Optional[str] = Field(default="", exclude=True, index=True) resolved_hosts: List = Field(default=[], sa_type=JSON) @@ -101,7 +94,8 @@ def get_data(self): web_spider_distance: int = 10 scope_distance: int = Field(default=10, index=True) scan: str = Field(index=True) - timestamp: NaiveUTC = Field(index=True) + timestamp: float = Field(index=True) + inserted_at: float = Field(default_factory=utc_now_timestamp) parent: str = Field(index=True) tags: List = Field(default=[], sa_type=JSON) module: str = Field(index=True) @@ -109,11 +103,11 @@ def get_data(self): discovery_context: str = "" discovery_path: List[str] = Field(default=[], sa_type=JSON) parent_chain: List[str] = Field(default=[], sa_type=JSON) - inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc)) ### SCAN ### + class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -128,6 +122,7 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### + class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 5e555ab0ff..6ad16620f6 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -23,7 +23,7 @@ class Mongo(BaseOutputModule): "database": "The name of the database to use", "username": "The username to use to connect to the database", "password": "The password to use to connect to the database", - "collection_prefix": "Prefix each collection with this string", + "collection_prefix": "Prefix the name of each collection with this string", } deps_pip = ["motor~=3.6.0"] @@ -62,20 +62,20 @@ async def handle_event(self, event): await self.events_collection.insert_one(event_pydantic.model_dump()) if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + scan_json = Scan(**event.data_json).model_dump() + existing_scan = await self.scans_collection.find_one({"id": event_pydantic.id}) if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + await self.scans_collection.replace_one({"id": event_pydantic.id}, scan_json) + self.verbose(f"Updated scan event with ID: {event_pydantic.id}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + self.verbose(f"Inserted new scan event with ID: {event_pydantic.id}") target_data = scan_json.get("target", {}) target = Target(**target_data) - existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + existing_target = await self.targets_collection.find_one({"hash": target.hash}) if existing_target: - await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + await self.targets_collection.replace_one({"hash": target.hash}, target.model_dump()) else: await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 62e5c9d3ab..a5b04bc2c7 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,7 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime +from datetime import datetime, UTC from collections import OrderedDict from bbot import __version__ @@ -327,8 +327,8 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now() - self.root_event.data["started_at"] = self.start_time.isoformat() + self.start_time = datetime.now(UTC) + self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +436,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now() + self.end_time = datetime.now(UTC) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) @@ -1130,9 +1130,9 @@ def json(self): j["target"] = self.preset.target.json j["preset"] = self.preset.to_dict(redact_secrets=True) if self.start_time is not None: - j["started_at"] = self.start_time.isoformat() + j["started_at"] = self.start_time.timestamp() if self.end_time is not None: - j["finished_at"] = self.end_time.isoformat() + j["finished_at"] = self.end_time.timestamp() if self.duration is not None: j["duration_seconds"] = self.duration_seconds if self.duration_human is not None: diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 229c58a290..4d73d036c1 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -# @pytest.fixture(scope="session", autouse=True) -# def install_all_python_deps(): -# deps_pip = set() -# for module in DEFAULT_PRESET.module_loader.preloaded().values(): -# deps_pip.update(set(module.get("deps", {}).get("pip", []))) +@pytest.fixture(scope="session", autouse=True) +def install_all_python_deps(): + deps_pip = set() + for module in DEFAULT_PRESET.module_loader.preloaded().values(): + deps_pip.update(set(module.get("deps", {}).get("pip", []))) -# constraint_file = tempwordlist(get_python_constraints()) + constraint_file = tempwordlist(get_python_constraints()) -# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) + subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index d29e7e79a8..a8088be4f2 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,11 +1,23 @@ -from datetime import datetime +from datetime import datetime, UTC +from zoneinfo import ZoneInfo from bbot.models.pydantic import Event +from bbot.core.event.base import BaseEvent +from bbot.models.helpers import utc_datetime_validator from ..bbot_fixtures import * # noqa def test_pydantic_models(events): + # test datetime helpers + now = datetime.now(ZoneInfo("America/New_York")) + utc_now = utc_datetime_validator(now) + assert now.timestamp() == utc_now.timestamp() + now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + assert now2.timestamp() == utc_now.timestamp() + utc_now2 = utc_datetime_validator(now2) + assert utc_now2.timestamp() == utc_now.timestamp() + assert Event._datetime_fields() == ["inserted_at", "timestamp"] test_event = Event(**events.ipv4.json()) @@ -23,18 +35,25 @@ def test_pydantic_models(events): ] # convert events to pydantic and back, making sure they're exactly the same - for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + for event in ("ipv4", "http_response", "finding", "vulnerability", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() + event_reconstituted = BaseEvent.from_json(event_pydantic_dict) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json + + event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) + assert event_pydantic_dict == event_json + event_pydantic_dict.pop("scan") + event_pydantic_dict.pop("module") + event_pydantic_dict.pop("module_sequence") + assert event_reconstituted.json() == event_pydantic_dict # TODO: SQL diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index a940dbce06..faadbdaae9 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() + timestamp = db_event.timestamp.timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index fcfed7841a..ac28e64e7b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -129,7 +129,17 @@ async def check(self, module_test, events): db_scans = await cursor.to_list(length=None) assert len(db_scans) == 1, "There should be exactly one scan" db_scan = db_scans[0] - assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + assert db_scan["id"] == main_event["scan"], "Scan id should match main event scan" + + ### TARGETS ### + + # Fetch all targets from the collection + cursor = db.get_collection(self.test_collection_prefix + "targets").find({}) + db_targets = await cursor.to_list(length=None) + assert len(db_targets) == 1, "There should be exactly one target" + db_target = db_targets[0] + scan_event = next(e for e in events if e.type == "SCAN") + assert db_target["hash"] == scan_event.data["target"]["hash"], "Target hash should match scan target hash" finally: # Clean up: Delete all documents in the collection diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py index ec80b7555d..7970627b15 100644 --- a/bbot/test/test_step_2/module_tests/test_module_sqlite.py +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -8,6 +8,8 @@ class TestSQLite(ModuleTestBase): def check(self, module_test, events): sqlite_output_file = module_test.scan.home / "output.sqlite" assert sqlite_output_file.exists(), "SQLite output file not found" + + # first connect with raw sqlite with sqlite3.connect(sqlite_output_file) as db: cursor = db.cursor() results = cursor.execute("SELECT * FROM event").fetchall() @@ -16,3 +18,15 @@ def check(self, module_test, events): assert len(results) == 1, "No scans found in SQLite database" results = cursor.execute("SELECT * FROM target").fetchall() assert len(results) == 1, "No targets found in SQLite database" + + # then connect with bbot models + from bbot.models.sql import Event + from sqlmodel import create_engine, Session, select + + engine = create_engine(f"sqlite:///{sqlite_output_file}") + + with Session(engine) as session: + statement = select(Event).where(Event.host == "evilcorp.com") + event = session.exec(statement).first() + assert event.host == "evilcorp.com", "Event host should match target host" + assert event.data == "evilcorp.com", "Event data should match target host" From ac9502b8d5a78214363f359c93671f30ee4cc3bf Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:02:48 -0500 Subject: [PATCH 010/147] flaked --- bbot/models/helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index c7fc078a45..47959ad4ac 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,7 +1,5 @@ from datetime import UTC from datetime import datetime -from typing_extensions import Annotated -from pydantic.functional_validators import AfterValidator def utc_datetime_validator(d: datetime) -> datetime: From d3f8e93800d3e5d8fa39529a24bbf9024aa60473 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:41:45 -0500 Subject: [PATCH 011/147] fix tests --- bbot/core/event/base.py | 7 ++++++- bbot/test/test_step_1/test_bbot_fastapi.py | 4 ++-- bbot/test/test_step_1/test_db_models.py | 8 ++++---- bbot/test/test_step_1/test_scan.py | 2 +- bbot/test/test_step_2/module_tests/test_module_json.py | 8 ++++---- bbot/test/test_step_2/module_tests/test_module_splunk.py | 2 +- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bd6e884b37..d4a37b8f24 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1762,7 +1762,12 @@ def event_from_json(j): "context": j.get("discovery_context", None), "dummy": True, } - data = j.get("data_json", j.get("data", None)) + data = j.get("data_json", None) + if data is None: + data = j.get("data", None) + if data is None: + json_pretty = json.dumps(j, indent=2) + raise ValueError(f"data or data_json must be provided. JSON: {json_pretty}") kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index bad4020712..617f95abbf 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -28,7 +28,7 @@ def test_bbot_multiprocess(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any([e["data"] == "test@blacklanternsecurity.com" for e in events]) + assert any([e.get("data", "") == "test@blacklanternsecurity.com" for e in events]) def test_bbot_fastapi(bbot_httpserver): @@ -61,7 +61,7 @@ def test_bbot_fastapi(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any([e["data"] == "test@blacklanternsecurity.com" for e in events]) + assert any([e.get("data", "") == "test@blacklanternsecurity.com" for e in events]) finally: with suppress(Exception): diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index a8088be4f2..c29cc09a4f 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -18,12 +18,12 @@ def test_pydantic_models(events): utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() - assert Event._datetime_fields() == ["inserted_at", "timestamp"] - test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ + "data", "host", "id", + "inserted_at", "module", "parent", "parent_uuid", @@ -40,10 +40,10 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_reconstituted = BaseEvent.from_json(event_pydantic_dict) + event_reconstituted = BaseEvent.from_json(event_pydantic.model_dump(exclude_none=True)) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) - assert isinstance(event_pydantic.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, float) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index f5f8458262..5514571fa8 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -144,7 +144,7 @@ async def test_python_output_matches_json(bbot_scanner): assert len(events) == 5 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert all([isinstance(e["data"]["status"], str) for e in scan_events]) + assert all([isinstance(e["data_json"]["status"], str) for e in scan_events]) assert len([e for e in events if e["type"] == "DNS_NAME"]) == 1 assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1 assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1 diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index bf79eeb13f..3641574213 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -23,13 +23,13 @@ def check(self, module_test, events): assert len(dns_json) == 1 dns_json = dns_json[0] scan = scan_json[0] - assert scan["data"]["name"] == module_test.scan.name - assert scan["data"]["id"] == module_test.scan.id + assert scan["data_json"]["name"] == module_test.scan.name + assert scan["data_json"]["id"] == module_test.scan.id assert scan["id"] == module_test.scan.id assert scan["uuid"] == str(module_test.scan.root_event.uuid) assert scan["parent_uuid"] == str(module_test.scan.root_event.uuid) - assert scan["data"]["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan["data"]["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["seeds"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["whitelist"] == ["blacklanternsecurity.com"] assert dns_json["data"] == dns_data assert dns_json["id"] == str(dns_event.id) assert dns_json["uuid"] == str(dns_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_splunk.py b/bbot/test/test_step_2/module_tests/test_module_splunk.py index d55ed17c27..eef148944c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_splunk.py +++ b/bbot/test/test_step_2/module_tests/test_module_splunk.py @@ -23,7 +23,7 @@ def verify_data(self, j): if not j["index"] == "bbot_index": return False data = j["event"] - if not data["data"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": + if not data["data_json"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": return False return True From 38900573000afb60f50e31257d2521deafdc2f7d Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 21:19:07 -0500 Subject: [PATCH 012/147] fix utc bug --- bbot/scanner/scanner.py | 7 ++++--- bbot/test/test_step_1/test_db_models.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index a5b04bc2c7..0915c4cb91 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,8 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime, UTC +from datetime import datetime +from zoneinfo import ZoneInfo from collections import OrderedDict from bbot import __version__ @@ -327,7 +328,7 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now(UTC) + self.start_time = datetime.now(ZoneInfo("UTC")) self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +437,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now(UTC) + self.end_time = datetime.now(ZoneInfo("UTC")) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index c29cc09a4f..9c71390696 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,4 +1,4 @@ -from datetime import datetime, UTC +from datetime import datetime from zoneinfo import ZoneInfo from bbot.models.pydantic import Event @@ -13,7 +13,7 @@ def test_pydantic_models(events): now = datetime.now(ZoneInfo("America/New_York")) utc_now = utc_datetime_validator(now) assert now.timestamp() == utc_now.timestamp() - now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + now2 = datetime.fromtimestamp(utc_now.timestamp(), ZoneInfo("UTC")) assert now2.timestamp() == utc_now.timestamp() utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() From 00cf3c708e94dcd0351ec8685514b5ffd7c5bf27 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:32:32 -0500 Subject: [PATCH 013/147] fix tests --- bbot/models/helpers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 47959ad4ac..b94bc976cc 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,5 +1,5 @@ -from datetime import UTC from datetime import datetime +from zoneinfo import ZoneInfo def utc_datetime_validator(d: datetime) -> datetime: @@ -7,13 +7,13 @@ def utc_datetime_validator(d: datetime) -> datetime: Converts all dates into UTC """ if d.tzinfo is not None: - return d.astimezone(UTC) + return d.astimezone(ZoneInfo("UTC")) else: - return d.replace(tzinfo=UTC) + return d.replace(tzinfo=ZoneInfo("UTC")) def utc_now() -> datetime: - return datetime.now(UTC) + return datetime.now(ZoneInfo("UTC")) def utc_now_timestamp() -> datetime: From 47538b3f054c5c5e6d077cfd3202913da7babe04 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:30:39 -0500 Subject: [PATCH 014/147] elastic module --- bbot/modules/output/elastic.py | 22 +++ bbot/modules/output/http.py | 6 +- .../module_tests/test_module_elastic.py | 130 ++++++++++++++++++ docs/scanning/output.md | 25 ++-- 4 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 bbot/modules/output/elastic.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_elastic.py diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py new file mode 100644 index 0000000000..15bc023df8 --- /dev/null +++ b/bbot/modules/output/elastic.py @@ -0,0 +1,22 @@ +from .http import HTTP + + +class Elastic(HTTP): + watched_events = ["*"] + metadata = { + "description": "Send scan results to Elasticsearch", + "created_date": "2022-11-21", + "author": "@TheTechromancer", + } + options = { + "url": "", + "username": "elastic", + "password": "changeme", + "timeout": 10, + } + options_desc = { + "url": "Elastic URL (e.g. https://localhost:9200//_doc)", + "username": "Elastic username", + "password": "Elastic password", + "timeout": "HTTP timeout", + } diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 7d94148d72..0af65a87d2 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -1,3 +1,4 @@ +from bbot.models.pydantic import Event from bbot.modules.output.base import BaseOutputModule @@ -48,12 +49,15 @@ async def setup(self): async def handle_event(self, event): while 1: + event_json = event.json() + event_pydantic = Event(**event_json) + event_json = event_pydantic.model_dump(exclude_none=True) response = await self.helpers.request( url=self.url, method=self.method, auth=self.auth, headers=self.headers, - json=event.json(), + json=event_json, ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py new file mode 100644 index 0000000000..710c22e0f0 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -0,0 +1,130 @@ +import time +import httpx +import asyncio + +from .base import ModuleTestBase + + +class TestElastic(ModuleTestBase): + config_overrides = { + "modules": { + "elastic": { + "url": "https://localhost:9200/bbot_test_events/_doc", + "username": "elastic", + "password": "bbotislife", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Elasticsearch container + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-elastic", + "--rm", + "-e", + "ELASTIC_PASSWORD=bbotislife", + "-e", + "cluster.routing.allocation.disk.watermark.low=96%", + "-e", + "cluster.routing.allocation.disk.watermark.high=97%", + "-e", + "cluster.routing.allocation.disk.watermark.flood_stage=98%", + "-p", + "9200:9200", + "-d", + "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", + ) + + # Connect to Elasticsearch with retry logic + async with httpx.AsyncClient(verify=False) as client: + while True: + try: + # Attempt a simple operation to confirm the connection + response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) + response.raise_for_status() + break + except Exception as e: + print(f"Connection failed: {e}. Retrying...", flush=True) + time.sleep(0.5) + + # Ensure the index is empty + await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + print("Elasticsearch index cleaned up", flush=True) + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to Elasticsearch + async with httpx.AsyncClient(verify=False) as client: + + # refresh the index + await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) + + # Fetch all events from the index + response = await client.get( + f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") + ) + response_json = response.json() + import json + + print(f"response: {json.dumps(response_json, indent=2)}") + db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert ( + main_event is not None + ), "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Events don't match exactly because the elastic ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + db_event.pop("inserted_at") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the index + async with httpx.AsyncClient(verify=False) as client: + response = await client.delete( + f"https://localhost:9200/bbot_test_events", + auth=("elastic", "bbotislife"), + params={"ignore": "400,404"}, + ) + print(f"Deleted documents from index", flush=True) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/docs/scanning/output.md b/docs/scanning/output.md index dd45a5c833..16cfbd3593 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -155,15 +155,20 @@ config: ### Elasticsearch -When outputting to Elastic, use the `http` output module with the following settings (replace `` with your desired index, e.g. `bbot`): +- Step 1: Spin up a quick Elasticsearch docker image + +```bash +docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 +``` + +- Step 2: Execute a scan with `elastic` output module ```bash # send scan results directly to elasticsearch -bbot -t evilcorp.com -om http -c \ - modules.http.url=http://localhost:8000//_doc \ - modules.http.siem_friendly=true \ - modules.http.username=elastic \ - modules.http.password=changeme +# note: you can replace "bbot_events" with your own index name +bbot -t evilcorp.com -om elastic -c \ + modules.elastic.url=https://localhost:9200/bbot_events/_doc \ + modules.elastic.password=bbotislife ``` Alternatively, via a preset: @@ -171,11 +176,9 @@ Alternatively, via a preset: ```yaml title="elastic_preset.yml" config: modules: - http: - url: http://localhost:8000//_doc - siem_friendly: true - username: elastic - password: changeme + elastic: + url: http://localhost:9200/bbot_events/_doc + password: bbotislife ``` ### Splunk From 2ff94d124e19b81f6245d56f0436ac529455918a Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:30:05 -0500 Subject: [PATCH 015/147] new module: kafka --- bbot/core/event/base.py | 2 +- bbot/modules/output/elastic.py | 14 ++- bbot/modules/output/kafka.py | 42 +++++++ bbot/scanner/scanner.py | 6 +- .../module_tests/test_module_elastic.py | 9 +- .../module_tests/test_module_kafka.py | 108 ++++++++++++++++++ 6 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 bbot/modules/output/kafka.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_kafka.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index d4a37b8f24..066e7469fb 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -814,7 +814,7 @@ def json(self, mode="json"): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - j.update({"tags": list(self.tags)}) + j.update({"tags": sorted(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 15bc023df8..42c331c516 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -2,6 +2,10 @@ class Elastic(HTTP): + """ + docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 + """ + watched_events = ["*"] metadata = { "description": "Send scan results to Elasticsearch", @@ -9,9 +13,9 @@ class Elastic(HTTP): "author": "@TheTechromancer", } options = { - "url": "", + "url": "https://localhost:9200/bbot_events/_doc", "username": "elastic", - "password": "changeme", + "password": "bbotislife", "timeout": 10, } options_desc = { @@ -20,3 +24,9 @@ class Elastic(HTTP): "password": "Elastic password", "timeout": "HTTP timeout", } + + async def cleanup(self): + # refresh the index + doc_regex = self.helpers.re.compile(r"/[^/]+$") + refresh_url = doc_regex.sub("/_refresh", self.url) + await self.helpers.request(refresh_url, auth=self.auth) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py new file mode 100644 index 0000000000..5b2db13d60 --- /dev/null +++ b/bbot/modules/output/kafka.py @@ -0,0 +1,42 @@ +import json +from aiokafka import AIOKafkaProducer + +from bbot.modules.output.base import BaseOutputModule + + +class Kafka(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a Kafka topic", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + options_desc = { + "bootstrap_servers": "A comma-separated list of Kafka server addresses", + "topic": "The Kafka topic to publish events to", + } + deps_pip = ["aiokafka~=0.12.0"] + + async def setup(self): + self.bootstrap_servers = self.config.get("bootstrap_servers", "localhost:9092") + self.topic = self.config.get("topic", "bbot_events") + self.producer = AIOKafkaProducer(bootstrap_servers=self.bootstrap_servers) + + # Start the producer + await self.producer.start() + self.verbose("Kafka producer started successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + await self.producer.send_and_wait(self.topic, event_data) + + async def cleanup(self): + # Stop the producer + await self.producer.stop() + self.verbose("Kafka producer stopped successfully") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 0915c4cb91..0db6e1225e 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -865,15 +865,15 @@ async def _cleanup(self): if not self._cleanedup: self._cleanedup = True self.status = "CLEANING_UP" + # clean up modules + for mod in self.modules.values(): + await mod._cleanup() # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - # clean up modules - for mod in self.modules.values(): - await mod._cleanup() with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 710c22e0f0..2f8891a640 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,12 +48,11 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - print(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) time.sleep(0.5) # Ensure the index is empty await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) - print("Elasticsearch index cleaned up", flush=True) async def check(self, module_test, events): try: @@ -65,17 +64,11 @@ async def check(self, module_test, events): # Connect to Elasticsearch async with httpx.AsyncClient(verify=False) as client: - # refresh the index - await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) - # Fetch all events from the index response = await client.get( f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") ) response_json = response.json() - import json - - print(f"response: {json.dumps(response_json, indent=2)}") db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] # make sure we have the same number of events diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py new file mode 100644 index 0000000000..6a81173561 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -0,0 +1,108 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestKafka(ModuleTestBase): + config_overrides = { + "modules": { + "kafka": { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Zookeeper + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-zookeeper", "-p", "2181:2181", "zookeeper:3.9" + ) + + # Wait for Zookeeper to be ready + while True: + try: + # Attempt to connect to Zookeeper with a timeout + reader, writer = await asyncio.wait_for(asyncio.open_connection("localhost", 2181), timeout=0.5) + break # Exit the loop if the connection is successful + except Exception as e: + self.log.verbose(f"Waiting for Zookeeper to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + finally: + with suppress(Exception): + writer.close() + await writer.wait_closed() + + # Start Kafka using wurstmeister/kafka + await asyncio.create_subprocess_exec( + "docker", + "run", + "-d", + "--rm", + "--name", + "bbot-test-kafka", + "--link", + "bbot-test-zookeeper:zookeeper", + "-e", + "KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181", + "-e", + "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092", + "-e", + "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092", + "-e", + "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1", + "-p", + "9092:9092", + "wurstmeister/kafka", + ) + + from aiokafka import AIOKafkaConsumer + + # Wait for Kafka to be ready + while True: + try: + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() + break # Exit the loop if the consumer starts successfully + except Exception as e: + self.log.verbose(f"Waiting for Kafka to be ready: {e}") + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + await asyncio.sleep(0.5) # Wait a bit before retrying + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from Kafka + kafka_events = [] + async for msg in self.consumer: + event_data = json.loads(msg.value.decode("utf-8")) + kafka_events.append(event_data) + if len(kafka_events) >= len(events_json): + break + + kafka_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == kafka_events, "Events do not match" + + finally: + # Clean up: Stop the Kafka consumer + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + # Stop Kafka and Zookeeper containers + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-kafka", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-zookeeper", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 88920d05a5320a82876fbd2229c88b5eaae62463 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:56:45 -0500 Subject: [PATCH 016/147] fix elastic tests --- bbot/test/test_step_2/module_tests/test_module_elastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 2f8891a640..db9f2359f7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,7 +48,7 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Ensure the index is empty @@ -117,7 +117,7 @@ async def check(self, module_test, events): auth=("elastic", "bbotislife"), params={"ignore": "400,404"}, ) - print(f"Deleted documents from index", flush=True) + self.log.verbose(f"Deleted documents from index") await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) From 10ed49450c336bb28ecb21eac958586a25a82369 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:45:47 -0500 Subject: [PATCH 017/147] rabbitmq module --- bbot/modules/output/kafka.py | 2 +- bbot/modules/output/rabbitmq.py | 50 ++++++++++++++ .../module_tests/test_module_rabbitmq.py | 69 +++++++++++++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 bbot/modules/output/rabbitmq.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_rabbitmq.py diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 5b2db13d60..9b99710928 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -8,7 +8,7 @@ class Kafka(BaseOutputModule): watched_events = ["*"] meta = { "description": "Output scan data to a Kafka topic", - "created_date": "2024-11-17", + "created_date": "2024-11-22", "author": "@TheTechromancer", } options = { diff --git a/bbot/modules/output/rabbitmq.py b/bbot/modules/output/rabbitmq.py new file mode 100644 index 0000000000..64c094dfbf --- /dev/null +++ b/bbot/modules/output/rabbitmq.py @@ -0,0 +1,50 @@ +import json +import aio_pika + +from bbot.modules.output.base import BaseOutputModule + + +class RabbitMQ(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a RabbitMQ queue", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "url": "amqp://guest:guest@localhost/", + "queue": "bbot_events", + } + options_desc = { + "url": "The RabbitMQ connection URL", + "queue": "The RabbitMQ queue to publish events to", + } + deps_pip = ["aio_pika~=9.5.0"] + + async def setup(self): + self.rabbitmq_url = self.config.get("url", "amqp://guest:guest@localhost/") + self.queue_name = self.config.get("queue", "bbot_events") + + # Connect to RabbitMQ + self.connection = await aio_pika.connect_robust(self.rabbitmq_url) + self.channel = await self.connection.channel() + + # Declare the queue + self.queue = await self.channel.declare_queue(self.queue_name, durable=True) + self.verbose("RabbitMQ connection and queue setup successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + + # Publish the message to the queue + await self.channel.default_exchange.publish( + aio_pika.Message(body=event_data), + routing_key=self.queue_name, + ) + + async def cleanup(self): + # Close the connection + await self.connection.close() + self.verbose("RabbitMQ connection closed successfully") diff --git a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py new file mode 100644 index 0000000000..d05808c2da --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py @@ -0,0 +1,69 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestRabbitMQ(ModuleTestBase): + config_overrides = { + "modules": { + "rabbitmq": { + "url": "amqp://guest:guest@localhost/", + "queue": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + import aio_pika + + # Start RabbitMQ + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-rabbitmq", "-p", "5672:5672", "rabbitmq:3-management" + ) + + # Wait for RabbitMQ to be ready + while True: + try: + # Attempt to connect to RabbitMQ with a timeout + connection = await aio_pika.connect_robust("amqp://guest:guest@localhost/") + break # Exit the loop if the connection is successful + except Exception as e: + with suppress(Exception): + await connection.close() + self.log.verbose(f"Waiting for RabbitMQ to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + + self.connection = connection + self.channel = await self.connection.channel() + self.queue = await self.channel.declare_queue("bbot_events", durable=True) + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from RabbitMQ + rabbitmq_events = [] + async with self.queue.iterator() as queue_iter: + async for message in queue_iter: + async with message.process(): + event_data = json.loads(message.body.decode("utf-8")) + rabbitmq_events.append(event_data) + if len(rabbitmq_events) >= len(events_json): + break + + rabbitmq_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == rabbitmq_events, "Events do not match" + + finally: + # Clean up: Close the RabbitMQ connection + await self.connection.close() + # Stop RabbitMQ container + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-rabbitmq", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 62267273aa2b61c4ac55212fa2ae90d24f667aab Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:29 -0500 Subject: [PATCH 018/147] better error handling in module --- bbot/modules/output/kafka.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 5b2db13d60..0c28075450 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -34,7 +34,12 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_data = json.dumps(event_json).encode("utf-8") - await self.producer.send_and_wait(self.topic, event_data) + while 1: + try: + await self.producer.send_and_wait(self.topic, event_data) + except Exception as e: + self.warning(f"Error sending event to Kafka: {e}, retrying...") + await self.helpers.sleep(1) async def cleanup(self): # Stop the producer From 996f8d5a09301032c213b77883a7804f0c1bd7bb Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:39 -0500 Subject: [PATCH 019/147] better error handling in module --- bbot/modules/output/kafka.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 0c28075450..0a31e0be12 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -37,6 +37,7 @@ async def handle_event(self, event): while 1: try: await self.producer.send_and_wait(self.topic, event_data) + break except Exception as e: self.warning(f"Error sending event to Kafka: {e}, retrying...") await self.helpers.sleep(1) From 571a6cdb10e6b3cc6c5f42754eaa91f62935bcea Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:57:19 -0500 Subject: [PATCH 020/147] fix tests, better error handling in module --- bbot/modules/output/rabbitmq.py | 14 ++++++++++---- bbot/test/test_step_1/test_python_api.py | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/bbot/modules/output/rabbitmq.py b/bbot/modules/output/rabbitmq.py index 64c094dfbf..ba4205940d 100644 --- a/bbot/modules/output/rabbitmq.py +++ b/bbot/modules/output/rabbitmq.py @@ -39,10 +39,16 @@ async def handle_event(self, event): event_data = json.dumps(event_json).encode("utf-8") # Publish the message to the queue - await self.channel.default_exchange.publish( - aio_pika.Message(body=event_data), - routing_key=self.queue_name, - ) + while 1: + try: + await self.channel.default_exchange.publish( + aio_pika.Message(body=event_data), + routing_key=self.queue_name, + ) + break + except Exception as e: + self.error(f"Error publishing message to RabbitMQ: {e}, rerying...") + await self.helpers.sleep(1) async def cleanup(self): # Close the connection diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index eaa9636b1c..d67cb45999 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -119,7 +119,7 @@ def test_python_api_validation(): # normal module as output module with pytest.raises(ValidationError) as error: Scanner(output_modules=["robots"]) - assert str(error.value) == 'Could not find output module "robots". Did you mean "web_report"?' + assert str(error.value) == 'Could not find output module "robots". Did you mean "rabbitmq"?' # invalid preset type with pytest.raises(ValidationError) as error: Scanner(preset="asdf") From 5fef8e2c7a76cbc54b66866de9ecab865d365aa9 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:58:06 -0500 Subject: [PATCH 021/147] better mongo error handling --- bbot/modules/output/mongo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 6ad16620f6..118ca82378 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -59,7 +59,13 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) - await self.events_collection.insert_one(event_pydantic.model_dump()) + while 1: + try: + await self.events_collection.insert_one(event_pydantic.model_dump()) + break + except Exception as e: + self.warning(f"Error inserting event into MongoDB: {e}, retrying...") + await self.helpers.sleep(1) if event.type == "SCAN": scan_json = Scan(**event.data_json).model_dump() From 59be2b23498ea9c7ba4ca2518d69d2835b9b6484 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 20:08:19 -0500 Subject: [PATCH 022/147] added zeromq output module --- bbot/modules/output/zeromq.py | 46 +++++++++++++++++++ .../module_tests/test_module_zeromq.py | 46 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 bbot/modules/output/zeromq.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_zeromq.py diff --git a/bbot/modules/output/zeromq.py b/bbot/modules/output/zeromq.py new file mode 100644 index 0000000000..938f234545 --- /dev/null +++ b/bbot/modules/output/zeromq.py @@ -0,0 +1,46 @@ +import zmq +import json + +from bbot.modules.output.base import BaseOutputModule + + +class ZeroMQ(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a ZeroMQ socket (PUB)", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "zmq_address": "", + } + options_desc = { + "zmq_address": "The ZeroMQ socket address to publish events to (e.g. tcp://localhost:5555)", + } + + async def setup(self): + self.zmq_address = self.config.get("zmq_address", "") + if not self.zmq_address: + return False, "ZeroMQ address is required" + self.context = zmq.asyncio.Context() + self.socket = self.context.socket(zmq.PUB) + self.socket.bind(self.zmq_address) + self.verbose("ZeroMQ publisher socket bound successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + while 1: + try: + await self.socket.send(event_data) + break + except Exception as e: + self.warning(f"Error sending event to ZeroMQ: {e}, retrying...") + await self.helpers.sleep(1) + + async def cleanup(self): + # Close the socket + self.socket.close() + self.context.term() + self.verbose("ZeroMQ publisher socket closed successfully") diff --git a/bbot/test/test_step_2/module_tests/test_module_zeromq.py b/bbot/test/test_step_2/module_tests/test_module_zeromq.py new file mode 100644 index 0000000000..8c118570ef --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_zeromq.py @@ -0,0 +1,46 @@ +import json +import zmq +import zmq.asyncio + +from .base import ModuleTestBase + + +class TestZeroMQ(ModuleTestBase): + config_overrides = { + "modules": { + "zeromq": { + "zmq_address": "tcp://localhost:5555", + } + } + } + + async def setup_before_prep(self, module_test): + # Setup ZeroMQ context and socket + self.context = zmq.asyncio.Context() + self.socket = self.context.socket(zmq.SUB) + self.socket.connect("tcp://localhost:5555") + self.socket.setsockopt_string(zmq.SUBSCRIBE, "") + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from ZeroMQ + zmq_events = [] + while len(zmq_events) < len(events_json): + msg = await self.socket.recv() + event_data = json.loads(msg.decode("utf-8")) + zmq_events.append(event_data) + + zmq_events.sort(key=lambda x: x["timestamp"]) + + assert len(events_json) == len(zmq_events), "Number of events does not match" + + # Verify the events match + assert events_json == zmq_events, "Events do not match" + + finally: + # Clean up: Close the ZeroMQ socket + self.socket.close() + self.context.term() From 4c757463d508a357474a6f29d977c5e7ef572a81 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:42:49 -0500 Subject: [PATCH 023/147] bump version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d333855f28..6c31fb1685 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bbot" -version = "2.3.0" +version = "3.0.0" description = "OSINT automation for hackers." authors = [ "TheTechromancer", @@ -104,7 +104,7 @@ lint.ignore = ["E402", "E711", "E712", "E713", "E721", "E731", "E741", "F401", " [tool.poetry-dynamic-versioning] enable = true metadata = false -format-jinja = 'v2.3.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' +format-jinja = 'v3.0.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' [tool.poetry-dynamic-versioning.substitution] files = ["*/__init__.py"] From 7f99c7eb12e6fc95a4d372a882376ed6e7f13666 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 23:22:52 -0500 Subject: [PATCH 024/147] fix conflict --- bbot/core/event/base.py | 6 +-- bbot/scanner/scanner.py | 3 +- bbot/test/bbot_fixtures.py | 76 ++++++++++++++++++++++++++------------ 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bb6d92e91f..7255b024f0 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,6 +40,7 @@ validators, get_file_extension, ) +from bbot.db.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") @@ -802,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = self.timestamp.isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() # parent event parent_id = self.parent_id if parent_id: @@ -811,8 +812,7 @@ def json(self, mode="json", siem_friendly=False): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - if self.tags: - j.update({"tags": list(self.tags)}) + j.update({"tags": list(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 49114a5b5d..62e5c9d3ab 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -500,7 +500,8 @@ async def setup_modules(self, remove_failed=True): self.modules[module.name].set_error_state() hard_failed.append(module.name) else: - self.info(f"Setup soft-failed for {module.name}: {msg}") + log_fn = self.warning if module._type == "output" else self.info + log_fn(f"Setup soft-failed for {module.name}: {msg}") soft_failed.append(module.name) if (not status) and (module._intercept or remove_failed): # if a intercept module fails setup, we always remove it diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index e1e3aa1b8b..4d73d036c1 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -147,48 +147,78 @@ def helpers(scan): @pytest.fixture def events(scan): + + dummy_module = scan._make_dummy_module("dummy_module") + class bbot_events: - localhost = scan.make_event("127.0.0.1", parent=scan.root_event) - ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event) - netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event) - ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event) - netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event) - domain = scan.make_event("publicAPIs.org", parent=scan.root_event) - subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event) - email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event) - open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event) + localhost = scan.make_event("127.0.0.1", parent=scan.root_event, module=dummy_module) + ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event, module=dummy_module) + netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event, module=dummy_module) + ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event, module=dummy_module) + netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event, module=dummy_module) + domain = scan.make_event("publicAPIs.org", parent=scan.root_event, module=dummy_module) + subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event, module=dummy_module) + email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event, module=dummy_module) + open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event, module=dummy_module) protocol = scan.make_event( - {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, "PROTOCOL", parent=scan.root_event + {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, + "PROTOCOL", + parent=scan.root_event, + module=dummy_module, + ) + ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event, module=dummy_module) + ipv6_open_port = scan.make_event( + "[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event, module=dummy_module + ) + url_unverified = scan.make_event( + "https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv4_url_unverified = scan.make_event( + "https://8.8.8.8:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv6_url_unverified = scan.make_event( + "https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event, module=dummy_module ) - ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event) - ipv6_open_port = scan.make_event("[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event) - url_unverified = scan.make_event("https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event) - ipv4_url_unverified = scan.make_event("https://8.8.8.8:443/hellofriend", parent=scan.root_event) - ipv6_url_unverified = scan.make_event("https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event) url = scan.make_event( - "https://api.publicAPIs.org:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://api.publicAPIs.org:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, ) ipv4_url = scan.make_event( - "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event, module=dummy_module ) ipv6_url = scan.make_event( - "https://[2001:4860:4860::8888]:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://[2001:4860:4860::8888]:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, + ) + url_hint = scan.make_event( + "https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url, module=dummy_module ) - url_hint = scan.make_event("https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url) vulnerability = scan.make_event( {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, "VULNERABILITY", parent=scan.root_event, + module=dummy_module, + ) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event, module=dummy_module + ) + vhost = scan.make_event( + {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module ) - finding = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event) - vhost = scan.make_event({"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event) - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) + http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, "STORAGE_BUCKET", parent=scan.root_event, + module=dummy_module, ) - emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event) + emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event, module=dummy_module) bbot_events.all = [ # noqa: F841 bbot_events.localhost, From bca05004da1a766c4d07248660d47e21a66f6022 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 17:25:16 -0500 Subject: [PATCH 025/147] resolve conflict --- bbot/core/event/base.py | 2 +- bbot/models/helpers.py | 16 +++ bbot/models/pydantic.py | 111 ++++++++++++++++++ bbot/{db/sql/models.py => models/sql.py} | 0 bbot/modules/output/mongo.py | 68 +++++++++++ bbot/modules/templates/sql.py | 2 +- bbot/test/test_step_1/test_db_models.py | 29 +++++ .../module_tests/test_module_mongo.py | 81 +++++++++++++ 8 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 bbot/models/helpers.py create mode 100644 bbot/models/pydantic.py rename bbot/{db/sql/models.py => models/sql.py} (100%) create mode 100644 bbot/modules/output/mongo.py create mode 100644 bbot/test/test_step_1/test_db_models.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_mongo.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 7255b024f0..18074321d8 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,7 +40,7 @@ validators, get_file_extension, ) -from bbot.db.helpers import naive_datetime_validator +from bbot.models.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py new file mode 100644 index 0000000000..40e127c53b --- /dev/null +++ b/bbot/models/helpers.py @@ -0,0 +1,16 @@ +from datetime import datetime +from typing_extensions import Annotated +from pydantic.functional_validators import AfterValidator + + +def naive_datetime_validator(d: datetime): + """ + Converts all dates into UTC, then drops timezone information. + + This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + """ + # drop timezone info + return d.replace(tzinfo=None) + + +NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py new file mode 100644 index 0000000000..0d54cc91b7 --- /dev/null +++ b/bbot/models/pydantic.py @@ -0,0 +1,111 @@ +import json +import logging +from datetime import datetime +from typing import Optional, List, Union, Annotated +from pydantic import BaseModel, ConfigDict, field_serializer + +from bbot.models.helpers import NaiveUTC, naive_datetime_validator + +log = logging.getLogger("bbot_server.models") + + +class BBOTBaseModel(BaseModel): + model_config = ConfigDict(extra="ignore") + + def to_json(self, **kwargs): + return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + + def __hash__(self): + return hash(self.to_json()) + + def __eq__(self, other): + return hash(self) == hash(other) + + +### EVENT ### + +class Event(BBOTBaseModel): + uuid: Annotated[str, "indexed", "unique"] + id: Annotated[str, "indexed"] + type: Annotated[str, "indexed"] + scope_description: str + data: Union[dict, str] + host: Annotated[Optional[str], "indexed"] = None + port: Optional[int] = None + netloc: Optional[str] = None + # we store the host in reverse to allow for instant subdomain queries + # this works because indexes are left-anchored, but we need to search starting from the right side + reverse_host: Annotated[Optional[str], "indexed"] = "" + resolved_hosts: Union[List, None] = None + dns_children: Union[dict, None] = None + web_spider_distance: int = 10 + scope_distance: int = 10 + scan: Annotated[str, "indexed"] + timestamp: Annotated[NaiveUTC, "indexed"] + parent: Annotated[str, "indexed"] + parent_uuid: Annotated[str, "indexed"] + tags: List = [] + module: Annotated[Optional[str], "indexed"] = None + module_sequence: Optional[str] = None + discovery_context: str = "" + discovery_path: List[str] = [] + parent_chain: List[str] = [] + + def __init__(self, **data): + super().__init__(**data) + if self.host: + self.reverse_host = self.host[::-1] + + @staticmethod + def _get_data(data, type): + if isinstance(data, dict) and list(data) == [type]: + return data[type] + return data + + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + + @field_serializer("timestamp") + def serialize_timestamp(self, timestamp: datetime, _info): + return naive_datetime_validator(timestamp).isoformat() + + +### SCAN ### + +class Scan(BBOTBaseModel): + id: Annotated[str, "indexed", "unique"] + name: str + status: Annotated[str, "indexed"] + started_at: Annotated[NaiveUTC, "indexed"] + finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + duration_seconds: Optional[float] = None + duration: Optional[str] = None + target: dict + preset: dict + + @classmethod + def from_scan(cls, scan): + return cls( + id=scan.id, + name=scan.name, + status=scan.status, + started_at=scan.started_at, + ) + + +### TARGET ### + +class Target(BBOTBaseModel): + name: str = "Default Target" + strict_scope: bool = False + seeds: List = [] + whitelist: List = [] + blacklist: List = [] + hash: Annotated[str, "indexed", "unique"] + scope_hash: Annotated[str, "indexed"] + seed_hash: Annotated[str, "indexed"] + whitelist_hash: Annotated[str, "indexed"] + blacklist_hash: Annotated[str, "indexed"] diff --git a/bbot/db/sql/models.py b/bbot/models/sql.py similarity index 100% rename from bbot/db/sql/models.py rename to bbot/models/sql.py diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py new file mode 100644 index 0000000000..dd4efa47ce --- /dev/null +++ b/bbot/modules/output/mongo.py @@ -0,0 +1,68 @@ +from motor.motor_asyncio import AsyncIOMotorClient + +from bbot.models.pydantic import Event +from bbot.modules.output.base import BaseOutputModule + + +class Mongo(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a MongoDB database", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "uri": "mongodb://localhost:27017", + "database": "bbot", + "collection_prefix": "", + } + options_desc = { + "uri": "The URI of the MongoDB server", + "database": "The name of the database to use", + "collection_prefix": "Prefix each collection with this string", + } + deps_pip = ["motor~=3.6.0"] + + async def setup(self): + self.uri = self.config.get("uri", "mongodb://localhost:27017") + self.db_client = AsyncIOMotorClient(self.uri) + + # Ping the server to confirm a successful connection + try: + await self.db_client.admin.command("ping") + self.verbose("MongoDB connection successful") + except Exception as e: + return False, f"Failed to connect to MongoDB: {e}" + + self.db_name = self.config.get("database", "bbot") + self.db = self.db_client[self.db_name] + self.collection_prefix = self.config.get("collection_prefix", "") + self.events_collection = self.db[f"{self.collection_prefix}events"] + self.scans_collection = self.db[f"{self.collection_prefix}scans"] + self.targets_collection = self.db[f"{self.collection_prefix}targets"] + + # Build an index for each field in reverse_host and host + for field in Event._indexed_fields(): + await self.collection.create_index([(field, 1)]) + self.verbose(f"Index created for field: {field}") + + return True + + async def handle_event(self, event): + event_json = event.json() + event_pydantic = Event(**event_json) + await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": + # here we merge the scan with the one sharing its UUID. + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + # Merge logic here, for example, update the existing scan with new data + updated_scan = {**existing_scan, **event_pydantic.model_dump()} + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py index 39b4e6f00e..42f5494555 100644 --- a/bbot/modules/templates/sql.py +++ b/bbot/modules/templates/sql.py @@ -3,7 +3,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from bbot.db.sql.models import Event, Scan, Target +from bbot.models.sql import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py new file mode 100644 index 0000000000..4e003f6f57 --- /dev/null +++ b/bbot/test/test_step_1/test_db_models.py @@ -0,0 +1,29 @@ +from bbot.models.pydantic import Event +from ..bbot_fixtures import * # noqa + + +def test_pydantic_models(events): + + test_event = Event(**events.ipv4.json()) + assert sorted(test_event._indexed_fields()) == [ + "host", + "id", + "module", + "parent", + "parent_uuid", + "reverse_host", + "scan", + "timestamp", + "type", + "uuid", + ] + + # events + for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + e = getattr(events, event) + event_json = e.json() + event_pydantic = Event(**event_json) + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + + +# TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py new file mode 100644 index 0000000000..10a8655e81 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -0,0 +1,81 @@ +from .base import ModuleTestBase + + +class TestMongo(ModuleTestBase): + test_db_name = "bbot_test" + test_collection_name = "events_test" + config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + + async def setup_before_module(self): + from motor.motor_asyncio import AsyncIOMotorClient + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # Check that there are no events in the collection + count = await collection.count_documents({}) + assert count == 0, "There are existing events in the database" + + # Close the MongoDB connection + client.close() + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + from motor.motor_asyncio import AsyncIOMotorClient + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # make sure the collection has all the right indexes + cursor = collection.list_indexes() + indexes = await cursor.to_list(length=None) + for field in Event._indexed_fields(): + assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + + # Fetch all events from the collection + cursor = collection.find({}) + db_events = await cursor.to_list(length=None) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Compare the sorted lists + assert len(events_json) == len(db_events_pydantic) + # Events don't match exactly because the mongo ones have reverse_host + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the collection + await collection.delete_many({}) + # Close the MongoDB connection + client.close() From 0eb565de405af77653a612cfa1783961756671af Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 18:39:37 -0500 Subject: [PATCH 026/147] more wip mongo --- bbot/models/helpers.py | 6 ++++- bbot/models/pydantic.py | 31 +++++++++++++++++-------- bbot/modules/output/mongo.py | 19 +++++++-------- bbot/test/test_step_1/test_db_models.py | 8 +++++++ 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 40e127c53b..985c845994 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -3,7 +3,7 @@ from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime): +def naive_datetime_validator(d: datetime) -> datetime: """ Converts all dates into UTC, then drops timezone information. @@ -13,4 +13,8 @@ def naive_datetime_validator(d: datetime): return d.replace(tzinfo=None) +def naive_utc_now() -> datetime: + return naive_datetime_validator(datetime.now()) + + NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0d54cc91b7..fe179878e7 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -2,9 +2,9 @@ import logging from datetime import datetime from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer +from pydantic import BaseModel, ConfigDict, field_serializer, Field -from bbot.models.helpers import NaiveUTC, naive_datetime_validator +from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,8 +12,18 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, **kwargs): - return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + def to_json(self, preserve_datetime=False): + ret = self.model_dump() + if preserve_datetime: + for key in ret: + val = getattr(self, key, None) + if isinstance(val, datetime): + ret[key] = val + return ret + + def to_json_string(self, preserve_datetime=False, **kwargs): + kwargs['sort_keys'] = True + return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) def __hash__(self): return hash(self.to_json()) @@ -21,6 +31,12 @@ def __hash__(self): def __eq__(self, other): return hash(self) == hash(other) + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + ### EVENT ### @@ -42,6 +58,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[NaiveUTC, "indexed"] + inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -62,12 +79,6 @@ def _get_data(data, type): return data[type] return data - @classmethod - def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) - @field_serializer("timestamp") def serialize_timestamp(self, timestamp: datetime, _info): return naive_datetime_validator(timestamp).isoformat() diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index dd4efa47ce..bb92d19d8a 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -1,6 +1,6 @@ from motor.motor_asyncio import AsyncIOMotorClient -from bbot.models.pydantic import Event +from bbot.models.pydantic import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule @@ -42,9 +42,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event._indexed_fields(): - await self.collection.create_index([(field, 1)]) - self.verbose(f"Index created for field: {field}") + for field in Event.model_fields: + if "indexed" in field.metadata: + unique = "unique" in field.metadata + await self.collection.create_index([(field, 1)], unique=unique) + self.verbose(f"Index created for field: {field}") return True @@ -52,17 +54,14 @@ async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": - # here we merge the scan with the one sharing its UUID. + scan_json = Scan.from_event(event).model_dump() existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) if existing_scan: - # Merge logic here, for example, update the existing scan with new data - updated_scan = {**existing_scan, **event_pydantic.model_dump()} - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") - - diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 4e003f6f57..1ba970f0e7 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,3 +1,5 @@ +from datetime import datetime + from bbot.models.pydantic import Event from ..bbot_fixtures import * # noqa @@ -23,6 +25,12 @@ def test_pydantic_models(events): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) + event_pydantic_dict = event_pydantic.to_json() + event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + assert isinstance(event_pydantic_dict["timestamp"], str) + assert isinstance(event_pydantic_dict["inserted_at"], str) + assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) + assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json From 82aa48eed1ba17cc08e0f4c97fa2666d14dcd94a Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 20:08:24 -0500 Subject: [PATCH 027/147] more mongo wip --- bbot/models/pydantic.py | 66 ++++++++------- bbot/modules/output/mongo.py | 34 ++++---- bbot/test/test_step_1/test_db_models.py | 14 +++- .../module_tests/test_module_mongo.py | 81 +++++++++++++++---- 4 files changed, 133 insertions(+), 62 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index fe179878e7..906801693a 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,10 +1,9 @@ -import json import logging from datetime import datetime -from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer, Field +from pydantic import BaseModel, ConfigDict, Field +from typing import Optional, List, Union, Annotated, get_type_hints -from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now +from bbot.models.helpers import NaiveUTC, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,19 +11,14 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, preserve_datetime=False): - ret = self.model_dump() - if preserve_datetime: - for key in ret: - val = getattr(self, key, None) - if isinstance(val, datetime): - ret[key] = val + def model_dump(self, preserve_datetime=False, **kwargs): + ret = super().model_dump(**kwargs) + if not preserve_datetime: + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].isoformat() return ret - def to_json_string(self, preserve_datetime=False, **kwargs): - kwargs['sort_keys'] = True - return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) - def __hash__(self): return hash(self.to_json()) @@ -33,13 +27,37 @@ def __eq__(self, other): @classmethod def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) + return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) + + @classmethod + def _get_type_hints(cls): + """ + Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + """ + type_hints = get_type_hints(cls) + unwrapped_type_hints = {} + for field_name in cls.model_fields: + type_hint = type_hints[field_name] + while 1: + if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + type_hint = type_hint.__args__[0] + else: + break + unwrapped_type_hints[field_name] = type_hint + return unwrapped_type_hints + + @classmethod + def _datetime_fields(cls): + datetime_fields = [] + for field_name, type_hint in cls._get_type_hints().items(): + if type_hint == datetime: + datetime_fields.append(field_name) + return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -73,19 +91,10 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] - @staticmethod - def _get_data(data, type): - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data - - @field_serializer("timestamp") - def serialize_timestamp(self, timestamp: datetime, _info): - return naive_datetime_validator(timestamp).isoformat() - ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -109,6 +118,7 @@ def from_scan(cls, scan): ### TARGET ### + class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bb92d19d8a..bc323d7ad9 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -14,18 +14,24 @@ class Mongo(BaseOutputModule): options = { "uri": "mongodb://localhost:27017", "database": "bbot", + "username": "", + "password": "", "collection_prefix": "", } options_desc = { "uri": "The URI of the MongoDB server", "database": "The name of the database to use", + "username": "The username to use to connect to the database", + "password": "The password to use to connect to the database", "collection_prefix": "Prefix each collection with this string", } deps_pip = ["motor~=3.6.0"] async def setup(self): self.uri = self.config.get("uri", "mongodb://localhost:27017") - self.db_client = AsyncIOMotorClient(self.uri) + self.username = self.config.get("username", "") + self.password = self.config.get("password", "") + self.db_client = AsyncIOMotorClient(self.uri, username=self.username, password=self.password) # Ping the server to confirm a successful connection try: @@ -42,11 +48,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event.model_fields: + for field_name, field in Event.model_fields.items(): if "indexed" in field.metadata: unique = "unique" in field.metadata - await self.collection.create_index([(field, 1)], unique=unique) - self.verbose(f"Index created for field: {field}") + await self.events_collection.create_index([(field_name, 1)], unique=unique) + self.verbose(f"Index created for field: {field_name} (unique={unique})") return True @@ -55,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - else: - # Insert as a new scan if no existing scan is found - await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + # if event.type == "SCAN": + # scan_json = Scan.from_event(event).model_dump() + # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + # if existing_scan: + # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + # else: + # # Insert as a new scan if no existing scan is found + # await self.scans_collection.insert_one(event_pydantic.model_dump()) + # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 1ba970f0e7..5a6fce547c 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -6,6 +6,8 @@ def test_pydantic_models(events): + assert Event._datetime_fields() == ["inserted_at", "timestamp"] + test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ "host", @@ -20,18 +22,22 @@ def test_pydantic_models(events): "uuid", ] - # events + # convert events to pydantic and back, making sure they're exactly the same for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) - event_pydantic_dict = event_pydantic.to_json() - event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + event_pydantic_dict = event_pydantic.model_dump() + event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) + assert isinstance(event_json["timestamp"], str) + assert isinstance(e.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, datetime) + assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], str) assert isinstance(event_pydantic_dict["inserted_at"], str) assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json # TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 10a8655e81..839e46156e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -1,21 +1,58 @@ +import time +import asyncio + from .base import ModuleTestBase class TestMongo(ModuleTestBase): test_db_name = "bbot_test" - test_collection_name = "events_test" - config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + test_collection_prefix = "test_" + config_overrides = { + "modules": { + "mongo": { + "database": test_db_name, + "username": "bbot", + "password": "bbotislife", + "collection_prefix": test_collection_prefix, + } + } + } + + async def setup_before_prep(self, module_test): + + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-mongo", + "--rm", + "-e", + "MONGO_INITDB_ROOT_USERNAME=bbot", + "-e", + "MONGO_INITDB_ROOT_PASSWORD=bbotislife", + "-p", + "27017:27017", + "-d", + "mongo", + ) - async def setup_before_module(self): from motor.motor_asyncio import AsyncIOMotorClient - # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") - db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + # Connect to the MongoDB collection with retry logic + while True: + try: + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") + db = client[self.test_db_name] + events_collection = db.get_collection(self.test_collection_prefix + "events") + # Attempt a simple operation to confirm the connection + await events_collection.count_documents({}) + break # Exit the loop if connection is successful + except Exception as e: + print(f"Connection failed: {e}. Retrying in 5 seconds...") + time.sleep(0.5) # Check that there are no events in the collection - count = await collection.count_documents({}) + count = await events_collection.count_documents({}) assert count == 0, "There are existing events in the database" # Close the MongoDB connection @@ -30,20 +67,30 @@ async def check(self, module_test, events): events_json.sort(key=lambda x: x["timestamp"]) # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + events_collection = db.get_collection(self.test_collection_prefix + "events") # make sure the collection has all the right indexes - cursor = collection.list_indexes() + cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" # Fetch all events from the collection - cursor = collection.find({}) + cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + # we currently don't store timestamps as datetime objects because mongodb has lower precision + # assert isinstance(db_event["timestamp"], datetime) + # assert isinstance(db_event["inserted_at"], datetime) + assert isinstance(db_event["timestamp"], str) + assert isinstance(db_event["inserted_at"], str) + # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] db_events_pydantic.sort(key=lambda x: x["timestamp"]) @@ -65,17 +112,19 @@ async def check(self, module_test, events): main_event.get("reverse_host") == expected_reverse_host ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" - # Compare the sorted lists - assert len(events_json) == len(db_events_pydantic) - # Events don't match exactly because the mongo ones have reverse_host + # Events don't match exactly because the mongo ones have reverse_host and inserted_at assert events_json != db_events_pydantic for db_event in db_events_pydantic: db_event.pop("reverse_host") + db_event.pop("inserted_at") # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" finally: # Clean up: Delete all documents in the collection - await collection.delete_many({}) + await events_collection.delete_many({}) # Close the MongoDB connection client.close() + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-mongo", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 64b43128a29c22018fcd4ab5ee0bbdfd086b26f0 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 20 Nov 2024 11:54:12 -0500 Subject: [PATCH 028/147] skip distro tests --- bbot/test/test_step_2/module_tests/test_module_mongo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 839e46156e..31e7f70747 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -17,6 +17,7 @@ class TestMongo(ModuleTestBase): } } } + skip_distro_tests = True async def setup_before_prep(self, module_test): @@ -48,7 +49,7 @@ async def setup_before_prep(self, module_test): await events_collection.count_documents({}) break # Exit the loop if connection is successful except Exception as e: - print(f"Connection failed: {e}. Retrying in 5 seconds...") + print(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Check that there are no events in the collection From 56f3659fb34f9165763e123cc5a8b64967271661 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 10:35:12 -0500 Subject: [PATCH 029/147] more wip mongo --- bbot/core/event/base.py | 8 ++++++-- bbot/models/pydantic.py | 10 +++++----- bbot/modules/output/mongo.py | 20 ++++++++++---------- bbot/test/bbot_fixtures.py | 14 +++++++------- bbot/test/test_step_1/test_db_models.py | 9 +++------ bbot/test/test_step_1/test_events.py | 6 +++--- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 18074321d8..ebaac46618 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -803,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1770,7 +1770,11 @@ def event_from_json(j, siem_friendly=False): resolved_hosts = j.get("resolved_hosts", []) event._resolved_hosts = set(resolved_hosts) - event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) + # accept both isoformat and unix timestamp + try: + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + except Exception: + event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] parent_id = j.get("parent", None) if parent_id is not None: diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 906801693a..388d85f05f 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -11,12 +11,12 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, preserve_datetime=False, **kwargs): + def model_dump(self, **kwargs): ret = super().model_dump(**kwargs) - if not preserve_datetime: - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].isoformat() + # convert datetime fields to unix timestamps + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].timestamp() return ret def __hash__(self): diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bc323d7ad9..03185b169c 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -61,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - # if event.type == "SCAN": - # scan_json = Scan.from_event(event).model_dump() - # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - # if existing_scan: - # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - # else: - # # Insert as a new scan if no existing scan is found - # await self.scans_collection.insert_one(event_pydantic.model_dump()) - # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + if event.type == "SCAN": + scan_json = Scan.from_event(event).model_dump() + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 4d73d036c1..229c58a290 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -@pytest.fixture(scope="session", autouse=True) -def install_all_python_deps(): - deps_pip = set() - for module in DEFAULT_PRESET.module_loader.preloaded().values(): - deps_pip.update(set(module.get("deps", {}).get("pip", []))) +# @pytest.fixture(scope="session", autouse=True) +# def install_all_python_deps(): +# deps_pip = set() +# for module in DEFAULT_PRESET.module_loader.preloaded().values(): +# deps_pip.update(set(module.get("deps", {}).get("pip", []))) - constraint_file = tempwordlist(get_python_constraints()) +# constraint_file = tempwordlist(get_python_constraints()) - subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) +# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 5a6fce547c..d29e7e79a8 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -28,15 +28,12 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) - assert isinstance(event_json["timestamp"], str) + assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json - assert isinstance(event_pydantic_dict["timestamp"], str) - assert isinstance(event_pydantic_dict["inserted_at"], str) - assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) - assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) + assert isinstance(event_pydantic_dict["timestamp"], float) + assert isinstance(event_pydantic_dict["inserted_at"], float) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 8156fc7969..5c6dedad8a 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.isoformat() + timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) @@ -515,7 +515,7 @@ async def test_events(events, helpers): assert reconstituted_event.uuid == db_event.uuid assert reconstituted_event.parent_uuid == scan.root_event.uuid assert reconstituted_event.scope_distance == 1 - assert reconstituted_event.timestamp.isoformat() == timestamp + assert reconstituted_event.timestamp.timestamp() == timestamp assert reconstituted_event.data == "evilcorp.com:80" assert reconstituted_event.type == "OPEN_TCP_PORT" assert reconstituted_event.host == "evilcorp.com" @@ -538,7 +538,7 @@ async def test_events(events, helpers): assert json_event_siemfriendly["timestamp"] == timestamp reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.isoformat() == timestamp + assert reconstituted_event2.timestamp.timestamp() == timestamp assert reconstituted_event2.data == "evilcorp.com:80" assert reconstituted_event2.type == "OPEN_TCP_PORT" assert reconstituted_event2.host == "evilcorp.com" From 10834a064d71cb554e73c590b242e51d0c496073 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 19:14:15 -0500 Subject: [PATCH 030/147] remove siem_friendly --- bbot/core/event/base.py | 18 +++++++--------- bbot/models/pydantic.py | 14 ++++++++----- bbot/models/sql.py | 21 +++++++------------ bbot/modules/output/http.py | 5 +---- bbot/modules/output/json.py | 6 ++---- bbot/modules/output/mongo.py | 8 +++++++ bbot/test/test_step_1/test_events.py | 21 +++++-------------- .../module_tests/test_module_http.py | 9 -------- .../module_tests/test_module_json.py | 15 ------------- .../module_tests/test_module_mongo.py | 20 +++++++++++++----- docs/scanning/tips_and_tricks.md | 18 ---------------- 11 files changed, 55 insertions(+), 100 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index ebaac46618..715afea69f 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -756,7 +756,7 @@ def __contains__(self, other): return bool(radixtarget.search(other.host)) return False - def json(self, mode="json", siem_friendly=False): + def json(self, mode="json"): """ Serializes the event object to a JSON-compatible dictionary. @@ -765,7 +765,6 @@ def json(self, mode="json", siem_friendly=False): Parameters: mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id". - siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary). Returns: dict: JSON-serializable dictionary representation of the event object. @@ -782,10 +781,12 @@ def json(self, mode="json", siem_friendly=False): data = data_attr else: data = smart_decode(self.data) - if siem_friendly: - j["data"] = {self.type: data} - else: + if isinstance(data, str): j["data"] = data + elif isinstance(data, dict): + j["data_json"] = data + else: + raise ValueError(f"Invalid data type: {type(data)}") # host, dns children if self.host: j["host"] = str(self.host) @@ -1725,7 +1726,7 @@ def make_event( ) -def event_from_json(j, siem_friendly=False): +def event_from_json(j): """ Creates an event object from a JSON dictionary. @@ -1757,10 +1758,7 @@ def event_from_json(j, siem_friendly=False): "context": j.get("discovery_context", None), "dummy": True, } - if siem_friendly: - data = j["data"][event_type] - else: - data = j["data"] + data = j.get("data_json", j.get("data", None)) kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 388d85f05f..0591a93515 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -57,13 +57,13 @@ def _datetime_fields(cls): ### EVENT ### - class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] type: Annotated[str, "indexed"] scope_description: str - data: Union[dict, str] + data: Annotated[Optional[str], "indexed"] = None + data_json: Optional[dict] = None host: Annotated[Optional[str], "indexed"] = None port: Optional[int] = None netloc: Optional[str] = None @@ -75,8 +75,8 @@ class Event(BBOTBaseModel): web_spider_distance: int = 10 scope_distance: int = 10 scan: Annotated[str, "indexed"] - timestamp: Annotated[NaiveUTC, "indexed"] - inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) + timestamp: Annotated[float, "indexed"] + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -91,9 +91,13 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] + def get_data(self): + if self.data is not None: + return self.data + return self.data_json -### SCAN ### +### SCAN ### class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index d6e7656108..9c5c8ef11a 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -67,24 +67,18 @@ def __eq__(self, other): ### EVENT ### - class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - data = self._get_data(self.data, self.type) - self.data = {self.type: data} + if self.data is None and self.data_json is None: + raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] def get_data(self): - return self._get_data(self.data, self.type) - - @staticmethod - def _get_data(data, type): - # handle SIEM-friendly format - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data + if self.data is not None: + return self.data + return self.data_json uuid: str = Field( primary_key=True, @@ -94,7 +88,8 @@ def _get_data(data, type): id: str = Field(index=True) type: str = Field(index=True) scope_description: str - data: dict = Field(sa_type=JSON) + data: Optional[str] = Field(default=None, index=True) + data_json: Optional[dict] = Field(default=None) host: Optional[str] port: Optional[int] netloc: Optional[str] @@ -118,7 +113,6 @@ def _get_data(data, type): ### SCAN ### - class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -133,7 +127,6 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### - class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 9d9241da0b..7d94148d72 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -15,7 +15,6 @@ class HTTP(BaseOutputModule): "username": "", "password": "", "timeout": 10, - "siem_friendly": False, } options_desc = { "url": "Web URL", @@ -24,14 +23,12 @@ class HTTP(BaseOutputModule): "username": "Username (basic auth)", "password": "Password (basic auth)", "timeout": "HTTP timeout", - "siem_friendly": "Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc.", } async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) - self.siem_friendly = self.config.get("siem_friendly", False) self.headers = {} bearer = self.config.get("bearer", "") if bearer: @@ -56,7 +53,7 @@ async def handle_event(self, event): method=self.method, auth=self.auth, headers=self.headers, - json=event.json(siem_friendly=self.siem_friendly), + json=event.json(), ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/modules/output/json.py b/bbot/modules/output/json.py index a35fa6aed7..b93d1e4e3f 100644 --- a/bbot/modules/output/json.py +++ b/bbot/modules/output/json.py @@ -11,20 +11,18 @@ class JSON(BaseOutputModule): "created_date": "2022-04-07", "author": "@TheTechromancer", } - options = {"output_file": "", "siem_friendly": False} + options = {"output_file": ""} options_desc = { "output_file": "Output to file", - "siem_friendly": "Output JSON in a SIEM-friendly format for ingestion into Elastic, Splunk, etc.", } _preserve_graph = True async def setup(self): self._prep_output_dir("output.json") - self.siem_friendly = self.config.get("siem_friendly", False) return True async def handle_event(self, event): - event_json = event.json(siem_friendly=self.siem_friendly) + event_json = event.json() event_str = json.dumps(event_json) if self.file is not None: self.file.write(event_str + "\n") diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 03185b169c..5e555ab0ff 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -71,3 +71,11 @@ async def handle_event(self, event): # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + target_data = scan_json.get("target", {}) + target = Target(**target_data) + existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + if existing_target: + await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + else: + await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 5c6dedad8a..a940dbce06 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -529,28 +529,17 @@ async def test_events(events, helpers): assert hostless_event_json["data"] == "asdf" assert not "host" in hostless_event_json - # SIEM-friendly serialize/deserialize - json_event_siemfriendly = db_event.json(siem_friendly=True) - assert json_event_siemfriendly["scope_distance"] == 1 - assert json_event_siemfriendly["data"] == {"OPEN_TCP_PORT": "evilcorp.com:80"} - assert json_event_siemfriendly["type"] == "OPEN_TCP_PORT" - assert json_event_siemfriendly["host"] == "evilcorp.com" - assert json_event_siemfriendly["timestamp"] == timestamp - reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) - assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.timestamp() == timestamp - assert reconstituted_event2.data == "evilcorp.com:80" - assert reconstituted_event2.type == "OPEN_TCP_PORT" - assert reconstituted_event2.host == "evilcorp.com" - assert "127.0.0.1" in reconstituted_event2.resolved_hosts - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" json_event = http_response.json(mode="graph") + assert "data" in json_event + assert "data_json" not in json_event assert isinstance(json_event["data"], str) json_event = http_response.json() - assert isinstance(json_event["data"], dict) + assert "data" not in json_event + assert "data_json" in json_event + assert isinstance(json_event["data_json"], dict) assert json_event["type"] == "HTTP_RESPONSE" assert json_event["host"] == "example.com" assert json_event["parent"] == scan.root_event.id diff --git a/bbot/test/test_step_2/module_tests/test_module_http.py b/bbot/test/test_step_2/module_tests/test_module_http.py index 43b7189adf..d634765425 100644 --- a/bbot/test/test_step_2/module_tests/test_module_http.py +++ b/bbot/test/test_step_2/module_tests/test_module_http.py @@ -52,12 +52,3 @@ def check(self, module_test, events): assert self.headers_correct == True assert self.method_correct == True assert self.url_correct == True - - -class TestHTTPSIEMFriendly(TestHTTP): - modules_overrides = ["http"] - config_overrides = {"modules": {"http": dict(TestHTTP.config_overrides["modules"]["http"])}} - config_overrides["modules"]["http"]["siem_friendly"] = True - - def verify_data(self, j): - return j["data"] == {"DNS_NAME": "blacklanternsecurity.com"} and j["type"] == "DNS_NAME" diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index 27ed5a55e0..bf79eeb13f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -53,18 +53,3 @@ def check(self, module_test, events): assert dns_reconstructed.discovery_context == context_data assert dns_reconstructed.discovery_path == [context_data] assert dns_reconstructed.parent_chain == [dns_json["uuid"]] - - -class TestJSONSIEMFriendly(ModuleTestBase): - modules_overrides = ["json"] - config_overrides = {"modules": {"json": {"siem_friendly": True}}} - - def check(self, module_test, events): - txt_file = module_test.scan.home / "output.json" - lines = list(module_test.scan.helpers.read_file(txt_file)) - passed = False - for line in lines: - e = json.loads(line) - if e["data"] == {"DNS_NAME": "blacklanternsecurity.com"}: - passed = True - assert passed diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 31e7f70747..fcfed7841a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -72,12 +72,16 @@ async def check(self, module_test, events): db = client[self.test_db_name] events_collection = db.get_collection(self.test_collection_prefix + "events") + ### INDEXES ### + # make sure the collection has all the right indexes cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + ### EVENTS ### + # Fetch all events from the collection cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) @@ -86,11 +90,8 @@ async def check(self, module_test, events): assert len(events_json) == len(db_events) for db_event in db_events: - # we currently don't store timestamps as datetime objects because mongodb has lower precision - # assert isinstance(db_event["timestamp"], datetime) - # assert isinstance(db_event["inserted_at"], datetime) - assert isinstance(db_event["timestamp"], str) - assert isinstance(db_event["inserted_at"], str) + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] @@ -121,6 +122,15 @@ async def check(self, module_test, events): # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" + ### SCANS ### + + # Fetch all scans from the collection + cursor = db.get_collection(self.test_collection_prefix + "scans").find({}) + db_scans = await cursor.to_list(length=None) + assert len(db_scans) == 1, "There should be exactly one scan" + db_scan = db_scans[0] + assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + finally: # Clean up: Delete all documents in the collection await events_collection.delete_many({}) diff --git a/docs/scanning/tips_and_tricks.md b/docs/scanning/tips_and_tricks.md index c5073c1d63..e13d82875e 100644 --- a/docs/scanning/tips_and_tricks.md +++ b/docs/scanning/tips_and_tricks.md @@ -108,24 +108,6 @@ config: bbot -t evilcorp.com -p skip_cdns.yml ``` -### Ingest BBOT Data Into SIEM (Elastic, Splunk) - -If your goal is to run a BBOT scan and later feed its data into a SIEM such as Elastic, be sure to enable this option when scanning: - -```bash -bbot -t evilcorp.com -c modules.json.siem_friendly=true -``` - -This ensures the `.data` event attribute is always the same type (a dictionary), by nesting it like so: -```json -{ - "type": "DNS_NAME", - "data": { - "DNS_NAME": "blacklanternsecurity.com" - } -} -``` - ### Custom HTTP Proxy Web pentesters may appreciate BBOT's ability to quickly populate Burp Suite site maps for all subdomains in a target. If your scan includes gowitness, this will capture the traffic as if you manually visited each website in your browser -- including auxiliary web resources and javascript API calls. To accomplish this, set the `web.http_proxy` config option like so: From 5179013a4936d3b0bb663fbf8767839ef4830006 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:00:28 -0500 Subject: [PATCH 031/147] steady work on mongo, bbot 3.0 --- bbot/core/event/base.py | 7 +- bbot/models/helpers.py | 20 +++--- bbot/models/pydantic.py | 71 +++++++++---------- bbot/models/sql.py | 27 +++---- bbot/modules/output/mongo.py | 16 ++--- bbot/scanner/scanner.py | 12 ++-- bbot/test/bbot_fixtures.py | 14 ++-- bbot/test/test_step_1/test_db_models.py | 25 ++++++- bbot/test/test_step_1/test_events.py | 2 +- .../module_tests/test_module_mongo.py | 12 +++- .../module_tests/test_module_sqlite.py | 14 ++++ 11 files changed, 128 insertions(+), 92 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 715afea69f..64595680ae 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -12,6 +12,7 @@ from copy import copy from pathlib import Path from typing import Optional +from zoneinfo import ZoneInfo from contextlib import suppress from radixtarget import RadixTarget from urllib.parse import urljoin, parse_qs @@ -40,7 +41,7 @@ validators, get_file_extension, ) -from bbot.models.helpers import naive_datetime_validator +from bbot.models.helpers import utc_datetime_validator log = logging.getLogger("bbot.core.event") @@ -804,7 +805,7 @@ def json(self, mode="json"): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() + j["timestamp"] = utc_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1770,7 +1771,7 @@ def event_from_json(j): # accept both isoformat and unix timestamp try: - event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"], ZoneInfo("UTC")) except Exception: event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 985c845994..c7fc078a45 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,20 +1,22 @@ +from datetime import UTC from datetime import datetime from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime) -> datetime: +def utc_datetime_validator(d: datetime) -> datetime: """ - Converts all dates into UTC, then drops timezone information. - - This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + Converts all dates into UTC """ - # drop timezone info - return d.replace(tzinfo=None) + if d.tzinfo is not None: + return d.astimezone(UTC) + else: + return d.replace(tzinfo=UTC) -def naive_utc_now() -> datetime: - return naive_datetime_validator(datetime.now()) +def utc_now() -> datetime: + return datetime.now(UTC) -NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] +def utc_now_timestamp() -> datetime: + return utc_now().timestamp() diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0591a93515..356ab2e44c 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,9 +1,8 @@ import logging -from datetime import datetime from pydantic import BaseModel, ConfigDict, Field -from typing import Optional, List, Union, Annotated, get_type_hints +from typing import Optional, List, Union, Annotated -from bbot.models.helpers import NaiveUTC, naive_utc_now +from bbot.models.helpers import utc_now_timestamp log = logging.getLogger("bbot_server.models") @@ -11,14 +10,6 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, **kwargs): - ret = super().model_dump(**kwargs) - # convert datetime fields to unix timestamps - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].timestamp() - return ret - def __hash__(self): return hash(self.to_json()) @@ -29,34 +20,37 @@ def __eq__(self, other): def _indexed_fields(cls): return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) - @classmethod - def _get_type_hints(cls): - """ - Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint - """ - type_hints = get_type_hints(cls) - unwrapped_type_hints = {} - for field_name in cls.model_fields: - type_hint = type_hints[field_name] - while 1: - if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): - type_hint = type_hint.__args__[0] - else: - break - unwrapped_type_hints[field_name] = type_hint - return unwrapped_type_hints - - @classmethod - def _datetime_fields(cls): - datetime_fields = [] - for field_name, type_hint in cls._get_type_hints().items(): - if type_hint == datetime: - datetime_fields.append(field_name) - return sorted(datetime_fields) + # we keep these because they were a lot of work to make and maybe someday they'll be useful again + + # @classmethod + # def _get_type_hints(cls): + # """ + # Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + # """ + # type_hints = get_type_hints(cls) + # unwrapped_type_hints = {} + # for field_name in cls.model_fields: + # type_hint = type_hints[field_name] + # while 1: + # if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + # type_hint = type_hint.__args__[0] + # else: + # break + # unwrapped_type_hints[field_name] = type_hint + # return unwrapped_type_hints + + # @classmethod + # def _datetime_fields(cls): + # datetime_fields = [] + # for field_name, type_hint in cls._get_type_hints().items(): + # if type_hint == datetime: + # datetime_fields.append(field_name) + # return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -76,7 +70,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[float, "indexed"] - inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=utc_now_timestamp) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -99,12 +93,13 @@ def get_data(self): ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str status: Annotated[str, "indexed"] - started_at: Annotated[NaiveUTC, "indexed"] - finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + started_at: Annotated[float, "indexed"] + finished_at: Annotated[Optional[float], "indexed"] = None duration_seconds: Optional[float] = None duration: Optional[str] = None target: dict diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 9c5c8ef11a..82ccdb1f6f 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -3,13 +3,15 @@ import json import logging +from datetime import datetime from pydantic import ConfigDict from typing import List, Optional -from datetime import datetime, timezone from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime +from bbot.models.helpers import utc_now_timestamp + log = logging.getLogger("bbot_server.models") @@ -27,14 +29,6 @@ def naive_datetime_validator(d: datetime): NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - # handle datetime - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) - - class BBOTBaseModel(SQLModel): model_config = ConfigDict(extra="ignore") @@ -52,7 +46,7 @@ def validated(self): return self def to_json(self, **kwargs): - return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs) + return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) @classmethod def _pk_column_names(cls): @@ -67,11 +61,10 @@ def __eq__(self, other): ### EVENT ### + class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - if self.data is None and self.data_json is None: - raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] @@ -87,12 +80,12 @@ def get_data(self): ) id: str = Field(index=True) type: str = Field(index=True) - scope_description: str data: Optional[str] = Field(default=None, index=True) - data_json: Optional[dict] = Field(default=None) + data_json: Optional[dict] = Field(default=None, sa_type=JSON) host: Optional[str] port: Optional[int] netloc: Optional[str] + scope_description: str # store the host in reversed form for efficient lookups by domain reverse_host: Optional[str] = Field(default="", exclude=True, index=True) resolved_hosts: List = Field(default=[], sa_type=JSON) @@ -100,7 +93,8 @@ def get_data(self): web_spider_distance: int = 10 scope_distance: int = Field(default=10, index=True) scan: str = Field(index=True) - timestamp: NaiveUTC = Field(index=True) + timestamp: float = Field(index=True) + inserted_at: float = Field(default_factory=utc_now_timestamp) parent: str = Field(index=True) tags: List = Field(default=[], sa_type=JSON) module: str = Field(index=True) @@ -108,11 +102,11 @@ def get_data(self): discovery_context: str = "" discovery_path: List[str] = Field(default=[], sa_type=JSON) parent_chain: List[str] = Field(default=[], sa_type=JSON) - inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc)) ### SCAN ### + class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -127,6 +121,7 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### + class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 5e555ab0ff..6ad16620f6 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -23,7 +23,7 @@ class Mongo(BaseOutputModule): "database": "The name of the database to use", "username": "The username to use to connect to the database", "password": "The password to use to connect to the database", - "collection_prefix": "Prefix each collection with this string", + "collection_prefix": "Prefix the name of each collection with this string", } deps_pip = ["motor~=3.6.0"] @@ -62,20 +62,20 @@ async def handle_event(self, event): await self.events_collection.insert_one(event_pydantic.model_dump()) if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + scan_json = Scan(**event.data_json).model_dump() + existing_scan = await self.scans_collection.find_one({"id": event_pydantic.id}) if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + await self.scans_collection.replace_one({"id": event_pydantic.id}, scan_json) + self.verbose(f"Updated scan event with ID: {event_pydantic.id}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + self.verbose(f"Inserted new scan event with ID: {event_pydantic.id}") target_data = scan_json.get("target", {}) target = Target(**target_data) - existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + existing_target = await self.targets_collection.find_one({"hash": target.hash}) if existing_target: - await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + await self.targets_collection.replace_one({"hash": target.hash}, target.model_dump()) else: await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 62e5c9d3ab..a5b04bc2c7 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,7 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime +from datetime import datetime, UTC from collections import OrderedDict from bbot import __version__ @@ -327,8 +327,8 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now() - self.root_event.data["started_at"] = self.start_time.isoformat() + self.start_time = datetime.now(UTC) + self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +436,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now() + self.end_time = datetime.now(UTC) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) @@ -1130,9 +1130,9 @@ def json(self): j["target"] = self.preset.target.json j["preset"] = self.preset.to_dict(redact_secrets=True) if self.start_time is not None: - j["started_at"] = self.start_time.isoformat() + j["started_at"] = self.start_time.timestamp() if self.end_time is not None: - j["finished_at"] = self.end_time.isoformat() + j["finished_at"] = self.end_time.timestamp() if self.duration is not None: j["duration_seconds"] = self.duration_seconds if self.duration_human is not None: diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 229c58a290..4d73d036c1 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -# @pytest.fixture(scope="session", autouse=True) -# def install_all_python_deps(): -# deps_pip = set() -# for module in DEFAULT_PRESET.module_loader.preloaded().values(): -# deps_pip.update(set(module.get("deps", {}).get("pip", []))) +@pytest.fixture(scope="session", autouse=True) +def install_all_python_deps(): + deps_pip = set() + for module in DEFAULT_PRESET.module_loader.preloaded().values(): + deps_pip.update(set(module.get("deps", {}).get("pip", []))) -# constraint_file = tempwordlist(get_python_constraints()) + constraint_file = tempwordlist(get_python_constraints()) -# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) + subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index d29e7e79a8..a8088be4f2 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,11 +1,23 @@ -from datetime import datetime +from datetime import datetime, UTC +from zoneinfo import ZoneInfo from bbot.models.pydantic import Event +from bbot.core.event.base import BaseEvent +from bbot.models.helpers import utc_datetime_validator from ..bbot_fixtures import * # noqa def test_pydantic_models(events): + # test datetime helpers + now = datetime.now(ZoneInfo("America/New_York")) + utc_now = utc_datetime_validator(now) + assert now.timestamp() == utc_now.timestamp() + now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + assert now2.timestamp() == utc_now.timestamp() + utc_now2 = utc_datetime_validator(now2) + assert utc_now2.timestamp() == utc_now.timestamp() + assert Event._datetime_fields() == ["inserted_at", "timestamp"] test_event = Event(**events.ipv4.json()) @@ -23,18 +35,25 @@ def test_pydantic_models(events): ] # convert events to pydantic and back, making sure they're exactly the same - for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + for event in ("ipv4", "http_response", "finding", "vulnerability", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() + event_reconstituted = BaseEvent.from_json(event_pydantic_dict) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json + + event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) + assert event_pydantic_dict == event_json + event_pydantic_dict.pop("scan") + event_pydantic_dict.pop("module") + event_pydantic_dict.pop("module_sequence") + assert event_reconstituted.json() == event_pydantic_dict # TODO: SQL diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index a940dbce06..faadbdaae9 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() + timestamp = db_event.timestamp.timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index fcfed7841a..ac28e64e7b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -129,7 +129,17 @@ async def check(self, module_test, events): db_scans = await cursor.to_list(length=None) assert len(db_scans) == 1, "There should be exactly one scan" db_scan = db_scans[0] - assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + assert db_scan["id"] == main_event["scan"], "Scan id should match main event scan" + + ### TARGETS ### + + # Fetch all targets from the collection + cursor = db.get_collection(self.test_collection_prefix + "targets").find({}) + db_targets = await cursor.to_list(length=None) + assert len(db_targets) == 1, "There should be exactly one target" + db_target = db_targets[0] + scan_event = next(e for e in events if e.type == "SCAN") + assert db_target["hash"] == scan_event.data["target"]["hash"], "Target hash should match scan target hash" finally: # Clean up: Delete all documents in the collection diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py index ec80b7555d..7970627b15 100644 --- a/bbot/test/test_step_2/module_tests/test_module_sqlite.py +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -8,6 +8,8 @@ class TestSQLite(ModuleTestBase): def check(self, module_test, events): sqlite_output_file = module_test.scan.home / "output.sqlite" assert sqlite_output_file.exists(), "SQLite output file not found" + + # first connect with raw sqlite with sqlite3.connect(sqlite_output_file) as db: cursor = db.cursor() results = cursor.execute("SELECT * FROM event").fetchall() @@ -16,3 +18,15 @@ def check(self, module_test, events): assert len(results) == 1, "No scans found in SQLite database" results = cursor.execute("SELECT * FROM target").fetchall() assert len(results) == 1, "No targets found in SQLite database" + + # then connect with bbot models + from bbot.models.sql import Event + from sqlmodel import create_engine, Session, select + + engine = create_engine(f"sqlite:///{sqlite_output_file}") + + with Session(engine) as session: + statement = select(Event).where(Event.host == "evilcorp.com") + event = session.exec(statement).first() + assert event.host == "evilcorp.com", "Event host should match target host" + assert event.data == "evilcorp.com", "Event data should match target host" From 414d26970c2503509839614f97aa2f9a4726bbfa Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:02:48 -0500 Subject: [PATCH 032/147] flaked --- bbot/models/helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index c7fc078a45..47959ad4ac 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,7 +1,5 @@ from datetime import UTC from datetime import datetime -from typing_extensions import Annotated -from pydantic.functional_validators import AfterValidator def utc_datetime_validator(d: datetime) -> datetime: From 7a24be69be3d8f185ade48915bef7f6b30c4604b Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:41:45 -0500 Subject: [PATCH 033/147] fix tests --- bbot/core/event/base.py | 7 ++++++- bbot/test/test_step_1/test_bbot_fastapi.py | 4 ++-- bbot/test/test_step_1/test_db_models.py | 8 ++++---- bbot/test/test_step_1/test_scan.py | 2 +- bbot/test/test_step_2/module_tests/test_module_json.py | 8 ++++---- bbot/test/test_step_2/module_tests/test_module_splunk.py | 2 +- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 64595680ae..dd751012a9 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1759,7 +1759,12 @@ def event_from_json(j): "context": j.get("discovery_context", None), "dummy": True, } - data = j.get("data_json", j.get("data", None)) + data = j.get("data_json", None) + if data is None: + data = j.get("data", None) + if data is None: + json_pretty = json.dumps(j, indent=2) + raise ValueError(f"data or data_json must be provided. JSON: {json_pretty}") kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index bad4020712..617f95abbf 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -28,7 +28,7 @@ def test_bbot_multiprocess(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any([e["data"] == "test@blacklanternsecurity.com" for e in events]) + assert any([e.get("data", "") == "test@blacklanternsecurity.com" for e in events]) def test_bbot_fastapi(bbot_httpserver): @@ -61,7 +61,7 @@ def test_bbot_fastapi(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any([e["data"] == "test@blacklanternsecurity.com" for e in events]) + assert any([e.get("data", "") == "test@blacklanternsecurity.com" for e in events]) finally: with suppress(Exception): diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index a8088be4f2..c29cc09a4f 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -18,12 +18,12 @@ def test_pydantic_models(events): utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() - assert Event._datetime_fields() == ["inserted_at", "timestamp"] - test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ + "data", "host", "id", + "inserted_at", "module", "parent", "parent_uuid", @@ -40,10 +40,10 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_reconstituted = BaseEvent.from_json(event_pydantic_dict) + event_reconstituted = BaseEvent.from_json(event_pydantic.model_dump(exclude_none=True)) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) - assert isinstance(event_pydantic.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, float) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index f5f8458262..5514571fa8 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -144,7 +144,7 @@ async def test_python_output_matches_json(bbot_scanner): assert len(events) == 5 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert all([isinstance(e["data"]["status"], str) for e in scan_events]) + assert all([isinstance(e["data_json"]["status"], str) for e in scan_events]) assert len([e for e in events if e["type"] == "DNS_NAME"]) == 1 assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1 assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1 diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index bf79eeb13f..3641574213 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -23,13 +23,13 @@ def check(self, module_test, events): assert len(dns_json) == 1 dns_json = dns_json[0] scan = scan_json[0] - assert scan["data"]["name"] == module_test.scan.name - assert scan["data"]["id"] == module_test.scan.id + assert scan["data_json"]["name"] == module_test.scan.name + assert scan["data_json"]["id"] == module_test.scan.id assert scan["id"] == module_test.scan.id assert scan["uuid"] == str(module_test.scan.root_event.uuid) assert scan["parent_uuid"] == str(module_test.scan.root_event.uuid) - assert scan["data"]["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan["data"]["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["seeds"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["whitelist"] == ["blacklanternsecurity.com"] assert dns_json["data"] == dns_data assert dns_json["id"] == str(dns_event.id) assert dns_json["uuid"] == str(dns_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_splunk.py b/bbot/test/test_step_2/module_tests/test_module_splunk.py index d55ed17c27..eef148944c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_splunk.py +++ b/bbot/test/test_step_2/module_tests/test_module_splunk.py @@ -23,7 +23,7 @@ def verify_data(self, j): if not j["index"] == "bbot_index": return False data = j["event"] - if not data["data"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": + if not data["data_json"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": return False return True From cc89f77f0902a091c490a69632e1f85a4ce0737a Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 21:19:07 -0500 Subject: [PATCH 034/147] fix utc bug --- bbot/scanner/scanner.py | 7 ++++--- bbot/test/test_step_1/test_db_models.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index a5b04bc2c7..0915c4cb91 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,8 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime, UTC +from datetime import datetime +from zoneinfo import ZoneInfo from collections import OrderedDict from bbot import __version__ @@ -327,7 +328,7 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now(UTC) + self.start_time = datetime.now(ZoneInfo("UTC")) self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +437,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now(UTC) + self.end_time = datetime.now(ZoneInfo("UTC")) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index c29cc09a4f..9c71390696 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,4 +1,4 @@ -from datetime import datetime, UTC +from datetime import datetime from zoneinfo import ZoneInfo from bbot.models.pydantic import Event @@ -13,7 +13,7 @@ def test_pydantic_models(events): now = datetime.now(ZoneInfo("America/New_York")) utc_now = utc_datetime_validator(now) assert now.timestamp() == utc_now.timestamp() - now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + now2 = datetime.fromtimestamp(utc_now.timestamp(), ZoneInfo("UTC")) assert now2.timestamp() == utc_now.timestamp() utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() From 059607c61ce2f7e69cd8997cc2eb0f835a80c04c Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:32:32 -0500 Subject: [PATCH 035/147] fix tests --- bbot/models/helpers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 47959ad4ac..b94bc976cc 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,5 +1,5 @@ -from datetime import UTC from datetime import datetime +from zoneinfo import ZoneInfo def utc_datetime_validator(d: datetime) -> datetime: @@ -7,13 +7,13 @@ def utc_datetime_validator(d: datetime) -> datetime: Converts all dates into UTC """ if d.tzinfo is not None: - return d.astimezone(UTC) + return d.astimezone(ZoneInfo("UTC")) else: - return d.replace(tzinfo=UTC) + return d.replace(tzinfo=ZoneInfo("UTC")) def utc_now() -> datetime: - return datetime.now(UTC) + return datetime.now(ZoneInfo("UTC")) def utc_now_timestamp() -> datetime: From d9cccf59bc96f33ca2a51710b459aa91bcaa2dbb Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:30:39 -0500 Subject: [PATCH 036/147] elastic module --- bbot/modules/output/elastic.py | 22 +++ bbot/modules/output/http.py | 6 +- .../module_tests/test_module_elastic.py | 130 ++++++++++++++++++ docs/scanning/output.md | 25 ++-- 4 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 bbot/modules/output/elastic.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_elastic.py diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py new file mode 100644 index 0000000000..15bc023df8 --- /dev/null +++ b/bbot/modules/output/elastic.py @@ -0,0 +1,22 @@ +from .http import HTTP + + +class Elastic(HTTP): + watched_events = ["*"] + metadata = { + "description": "Send scan results to Elasticsearch", + "created_date": "2022-11-21", + "author": "@TheTechromancer", + } + options = { + "url": "", + "username": "elastic", + "password": "changeme", + "timeout": 10, + } + options_desc = { + "url": "Elastic URL (e.g. https://localhost:9200//_doc)", + "username": "Elastic username", + "password": "Elastic password", + "timeout": "HTTP timeout", + } diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 7d94148d72..0af65a87d2 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -1,3 +1,4 @@ +from bbot.models.pydantic import Event from bbot.modules.output.base import BaseOutputModule @@ -48,12 +49,15 @@ async def setup(self): async def handle_event(self, event): while 1: + event_json = event.json() + event_pydantic = Event(**event_json) + event_json = event_pydantic.model_dump(exclude_none=True) response = await self.helpers.request( url=self.url, method=self.method, auth=self.auth, headers=self.headers, - json=event.json(), + json=event_json, ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py new file mode 100644 index 0000000000..710c22e0f0 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -0,0 +1,130 @@ +import time +import httpx +import asyncio + +from .base import ModuleTestBase + + +class TestElastic(ModuleTestBase): + config_overrides = { + "modules": { + "elastic": { + "url": "https://localhost:9200/bbot_test_events/_doc", + "username": "elastic", + "password": "bbotislife", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Elasticsearch container + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-elastic", + "--rm", + "-e", + "ELASTIC_PASSWORD=bbotislife", + "-e", + "cluster.routing.allocation.disk.watermark.low=96%", + "-e", + "cluster.routing.allocation.disk.watermark.high=97%", + "-e", + "cluster.routing.allocation.disk.watermark.flood_stage=98%", + "-p", + "9200:9200", + "-d", + "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", + ) + + # Connect to Elasticsearch with retry logic + async with httpx.AsyncClient(verify=False) as client: + while True: + try: + # Attempt a simple operation to confirm the connection + response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) + response.raise_for_status() + break + except Exception as e: + print(f"Connection failed: {e}. Retrying...", flush=True) + time.sleep(0.5) + + # Ensure the index is empty + await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + print("Elasticsearch index cleaned up", flush=True) + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to Elasticsearch + async with httpx.AsyncClient(verify=False) as client: + + # refresh the index + await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) + + # Fetch all events from the index + response = await client.get( + f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") + ) + response_json = response.json() + import json + + print(f"response: {json.dumps(response_json, indent=2)}") + db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert ( + main_event is not None + ), "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Events don't match exactly because the elastic ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + db_event.pop("inserted_at") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the index + async with httpx.AsyncClient(verify=False) as client: + response = await client.delete( + f"https://localhost:9200/bbot_test_events", + auth=("elastic", "bbotislife"), + params={"ignore": "400,404"}, + ) + print(f"Deleted documents from index", flush=True) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/docs/scanning/output.md b/docs/scanning/output.md index dd45a5c833..16cfbd3593 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -155,15 +155,20 @@ config: ### Elasticsearch -When outputting to Elastic, use the `http` output module with the following settings (replace `` with your desired index, e.g. `bbot`): +- Step 1: Spin up a quick Elasticsearch docker image + +```bash +docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 +``` + +- Step 2: Execute a scan with `elastic` output module ```bash # send scan results directly to elasticsearch -bbot -t evilcorp.com -om http -c \ - modules.http.url=http://localhost:8000//_doc \ - modules.http.siem_friendly=true \ - modules.http.username=elastic \ - modules.http.password=changeme +# note: you can replace "bbot_events" with your own index name +bbot -t evilcorp.com -om elastic -c \ + modules.elastic.url=https://localhost:9200/bbot_events/_doc \ + modules.elastic.password=bbotislife ``` Alternatively, via a preset: @@ -171,11 +176,9 @@ Alternatively, via a preset: ```yaml title="elastic_preset.yml" config: modules: - http: - url: http://localhost:8000//_doc - siem_friendly: true - username: elastic - password: changeme + elastic: + url: http://localhost:9200/bbot_events/_doc + password: bbotislife ``` ### Splunk From 189d7257c124c288740a24fa8c11cab9297318b1 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 23 Nov 2024 12:19:16 -0500 Subject: [PATCH 037/147] fix conflict --- bbot/models/pydantic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 356ab2e44c..07534937a2 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -93,7 +93,6 @@ def get_data(self): ### SCAN ### - class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -117,7 +116,6 @@ def from_scan(cls, scan): ### TARGET ### - class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False From 59177c45e14be99c6cf1bf8c2143cff6530d494e Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:30:05 -0500 Subject: [PATCH 038/147] new module: kafka --- bbot/core/event/base.py | 2 +- bbot/modules/output/elastic.py | 14 ++- bbot/modules/output/kafka.py | 42 +++++++ bbot/scanner/scanner.py | 6 +- .../module_tests/test_module_elastic.py | 9 +- .../module_tests/test_module_kafka.py | 108 ++++++++++++++++++ 6 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 bbot/modules/output/kafka.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_kafka.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 76802dac81..05f1a91271 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -814,7 +814,7 @@ def json(self, mode="json"): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - j.update({"tags": list(self.tags)}) + j.update({"tags": sorted(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 15bc023df8..42c331c516 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -2,6 +2,10 @@ class Elastic(HTTP): + """ + docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 + """ + watched_events = ["*"] metadata = { "description": "Send scan results to Elasticsearch", @@ -9,9 +13,9 @@ class Elastic(HTTP): "author": "@TheTechromancer", } options = { - "url": "", + "url": "https://localhost:9200/bbot_events/_doc", "username": "elastic", - "password": "changeme", + "password": "bbotislife", "timeout": 10, } options_desc = { @@ -20,3 +24,9 @@ class Elastic(HTTP): "password": "Elastic password", "timeout": "HTTP timeout", } + + async def cleanup(self): + # refresh the index + doc_regex = self.helpers.re.compile(r"/[^/]+$") + refresh_url = doc_regex.sub("/_refresh", self.url) + await self.helpers.request(refresh_url, auth=self.auth) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py new file mode 100644 index 0000000000..5b2db13d60 --- /dev/null +++ b/bbot/modules/output/kafka.py @@ -0,0 +1,42 @@ +import json +from aiokafka import AIOKafkaProducer + +from bbot.modules.output.base import BaseOutputModule + + +class Kafka(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a Kafka topic", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + options_desc = { + "bootstrap_servers": "A comma-separated list of Kafka server addresses", + "topic": "The Kafka topic to publish events to", + } + deps_pip = ["aiokafka~=0.12.0"] + + async def setup(self): + self.bootstrap_servers = self.config.get("bootstrap_servers", "localhost:9092") + self.topic = self.config.get("topic", "bbot_events") + self.producer = AIOKafkaProducer(bootstrap_servers=self.bootstrap_servers) + + # Start the producer + await self.producer.start() + self.verbose("Kafka producer started successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + await self.producer.send_and_wait(self.topic, event_data) + + async def cleanup(self): + # Stop the producer + await self.producer.stop() + self.verbose("Kafka producer stopped successfully") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 2602fa776c..8e99f104dd 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -865,15 +865,15 @@ async def _cleanup(self): if not self._cleanedup: self._cleanedup = True self.status = "CLEANING_UP" + # clean up modules + for mod in self.modules.values(): + await mod._cleanup() # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - # clean up modules - for mod in self.modules.values(): - await mod._cleanup() with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 710c22e0f0..2f8891a640 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,12 +48,11 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - print(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) time.sleep(0.5) # Ensure the index is empty await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) - print("Elasticsearch index cleaned up", flush=True) async def check(self, module_test, events): try: @@ -65,17 +64,11 @@ async def check(self, module_test, events): # Connect to Elasticsearch async with httpx.AsyncClient(verify=False) as client: - # refresh the index - await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) - # Fetch all events from the index response = await client.get( f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") ) response_json = response.json() - import json - - print(f"response: {json.dumps(response_json, indent=2)}") db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] # make sure we have the same number of events diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py new file mode 100644 index 0000000000..6a81173561 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -0,0 +1,108 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestKafka(ModuleTestBase): + config_overrides = { + "modules": { + "kafka": { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Zookeeper + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-zookeeper", "-p", "2181:2181", "zookeeper:3.9" + ) + + # Wait for Zookeeper to be ready + while True: + try: + # Attempt to connect to Zookeeper with a timeout + reader, writer = await asyncio.wait_for(asyncio.open_connection("localhost", 2181), timeout=0.5) + break # Exit the loop if the connection is successful + except Exception as e: + self.log.verbose(f"Waiting for Zookeeper to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + finally: + with suppress(Exception): + writer.close() + await writer.wait_closed() + + # Start Kafka using wurstmeister/kafka + await asyncio.create_subprocess_exec( + "docker", + "run", + "-d", + "--rm", + "--name", + "bbot-test-kafka", + "--link", + "bbot-test-zookeeper:zookeeper", + "-e", + "KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181", + "-e", + "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092", + "-e", + "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092", + "-e", + "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1", + "-p", + "9092:9092", + "wurstmeister/kafka", + ) + + from aiokafka import AIOKafkaConsumer + + # Wait for Kafka to be ready + while True: + try: + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() + break # Exit the loop if the consumer starts successfully + except Exception as e: + self.log.verbose(f"Waiting for Kafka to be ready: {e}") + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + await asyncio.sleep(0.5) # Wait a bit before retrying + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from Kafka + kafka_events = [] + async for msg in self.consumer: + event_data = json.loads(msg.value.decode("utf-8")) + kafka_events.append(event_data) + if len(kafka_events) >= len(events_json): + break + + kafka_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == kafka_events, "Events do not match" + + finally: + # Clean up: Stop the Kafka consumer + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + # Stop Kafka and Zookeeper containers + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-kafka", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-zookeeper", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From cb9bca2a5c0256f8209eb43df0f2ebecc91aae83 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:56:45 -0500 Subject: [PATCH 039/147] fix elastic tests --- bbot/test/test_step_2/module_tests/test_module_elastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 2f8891a640..db9f2359f7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,7 +48,7 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Ensure the index is empty @@ -117,7 +117,7 @@ async def check(self, module_test, events): auth=("elastic", "bbotislife"), params={"ignore": "400,404"}, ) - print(f"Deleted documents from index", flush=True) + self.log.verbose(f"Deleted documents from index") await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) From a0fc76a6f9a6f0d6438f58bd52b15677f6c31164 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:29 -0500 Subject: [PATCH 040/147] better error handling in module --- bbot/modules/output/kafka.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 5b2db13d60..0c28075450 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -34,7 +34,12 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_data = json.dumps(event_json).encode("utf-8") - await self.producer.send_and_wait(self.topic, event_data) + while 1: + try: + await self.producer.send_and_wait(self.topic, event_data) + except Exception as e: + self.warning(f"Error sending event to Kafka: {e}, retrying...") + await self.helpers.sleep(1) async def cleanup(self): # Stop the producer From 6254dae5f6d50f830b2435d6a9f8547e281ffa1b Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:39 -0500 Subject: [PATCH 041/147] better error handling in module --- bbot/modules/output/kafka.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 0c28075450..0a31e0be12 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -37,6 +37,7 @@ async def handle_event(self, event): while 1: try: await self.producer.send_and_wait(self.topic, event_data) + break except Exception as e: self.warning(f"Error sending event to Kafka: {e}, retrying...") await self.helpers.sleep(1) From 7476929a5c01afee4594317a7753e3bd643f25b8 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:58:06 -0500 Subject: [PATCH 042/147] better mongo error handling --- bbot/modules/output/mongo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 6ad16620f6..118ca82378 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -59,7 +59,13 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) - await self.events_collection.insert_one(event_pydantic.model_dump()) + while 1: + try: + await self.events_collection.insert_one(event_pydantic.model_dump()) + break + except Exception as e: + self.warning(f"Error inserting event into MongoDB: {e}, retrying...") + await self.helpers.sleep(1) if event.type == "SCAN": scan_json = Scan(**event.data_json).model_dump() From eb668a64d211150f205ce3e6bcf8956206dde2fb Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 22:26:49 -0500 Subject: [PATCH 043/147] add NATS module --- bbot/modules/output/nats.py | 53 +++++++++++++++ bbot/test/test_step_2/module_tests/base.py | 14 ++++ .../module_tests/test_module_elastic.py | 13 +--- .../module_tests/test_module_kafka.py | 38 +++-------- .../module_tests/test_module_mongo.py | 23 +------ .../module_tests/test_module_mysql.py | 15 +---- .../module_tests/test_module_nats.py | 64 +++++++++++++++++++ .../module_tests/test_module_postgres.py | 22 +------ 8 files changed, 146 insertions(+), 96 deletions(-) create mode 100644 bbot/modules/output/nats.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_nats.py diff --git a/bbot/modules/output/nats.py b/bbot/modules/output/nats.py new file mode 100644 index 0000000000..569645cc3b --- /dev/null +++ b/bbot/modules/output/nats.py @@ -0,0 +1,53 @@ +import json +import nats +from bbot.modules.output.base import BaseOutputModule + + +class NATS(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a NATS subject", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "servers": [], + "subject": "bbot_events", + } + options_desc = { + "servers": "A list of NATS server addresses", + "subject": "The NATS subject to publish events to", + } + deps_pip = ["nats-py"] + + async def setup(self): + self.servers = list(self.config.get("servers", [])) + if not self.servers: + return False, "NATS servers are required" + self.subject = self.config.get("subject", "bbot_events") + + # Connect to the NATS server + try: + self.nc = await nats.connect(self.servers) + except Exception as e: + import traceback + + return False, f"Error connecting to NATS: {e}\n{traceback.format_exc()}" + self.verbose("NATS client connected successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + while 1: + try: + await self.nc.publish(self.subject, event_data) + break + except Exception as e: + self.warning(f"Error sending event to NATS: {e}, retrying...") + await self.helpers.sleep(1) + + async def cleanup(self): + # Close the NATS connection + await self.nc.close() + self.verbose("NATS client disconnected successfully") diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index 3f6b5dd768..697dc2a235 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -153,3 +153,17 @@ async def setup_before_prep(self, module_test): async def setup_after_prep(self, module_test): pass + + async def wait_for_port_open(self, port): + while not await self.is_port_open("localhost", port): + self.log.verbose(f"Waiting for port {port} to be open...") + await asyncio.sleep(0.5) + + async def is_port_open(self, host, port): + try: + reader, writer = await asyncio.open_connection(host, port) + writer.close() + await writer.wait_closed() + return True + except (ConnectionRefusedError, OSError): + return False diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index db9f2359f7..902ef3539e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -1,4 +1,3 @@ -import time import httpx import asyncio @@ -39,18 +38,10 @@ async def setup_before_prep(self, module_test): "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", ) + await self.wait_for_port_open(9200) + # Connect to Elasticsearch with retry logic async with httpx.AsyncClient(verify=False) as client: - while True: - try: - # Attempt a simple operation to confirm the connection - response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) - response.raise_for_status() - break - except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...") - time.sleep(0.5) - # Ensure the index is empty await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py index 6a81173561..ee4c3a65f4 100644 --- a/bbot/test/test_step_2/module_tests/test_module_kafka.py +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -1,6 +1,5 @@ import json import asyncio -from contextlib import suppress from .base import ModuleTestBase @@ -23,18 +22,7 @@ async def setup_before_prep(self, module_test): ) # Wait for Zookeeper to be ready - while True: - try: - # Attempt to connect to Zookeeper with a timeout - reader, writer = await asyncio.wait_for(asyncio.open_connection("localhost", 2181), timeout=0.5) - break # Exit the loop if the connection is successful - except Exception as e: - self.log.verbose(f"Waiting for Zookeeper to be ready: {e}") - await asyncio.sleep(0.5) # Wait a bit before retrying - finally: - with suppress(Exception): - writer.close() - await writer.wait_closed() + await self.wait_for_port_open(2181) # Start Kafka using wurstmeister/kafka await asyncio.create_subprocess_exec( @@ -59,23 +47,17 @@ async def setup_before_prep(self, module_test): "wurstmeister/kafka", ) + # Wait for Kafka to be ready + await self.wait_for_port_open(9092) + from aiokafka import AIOKafkaConsumer - # Wait for Kafka to be ready - while True: - try: - self.consumer = AIOKafkaConsumer( - "bbot_events", - bootstrap_servers="localhost:9092", - group_id="test_group", - ) - await self.consumer.start() - break # Exit the loop if the consumer starts successfully - except Exception as e: - self.log.verbose(f"Waiting for Kafka to be ready: {e}") - if hasattr(self, "consumer") and not self.consumer._closed: - await self.consumer.stop() - await asyncio.sleep(0.5) # Wait a bit before retrying + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() async def check(self, module_test, events): try: diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index ac28e64e7b..aa483eac21 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -1,4 +1,3 @@ -import time import asyncio from .base import ModuleTestBase @@ -37,27 +36,7 @@ async def setup_before_prep(self, module_test): "mongo", ) - from motor.motor_asyncio import AsyncIOMotorClient - - # Connect to the MongoDB collection with retry logic - while True: - try: - client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") - db = client[self.test_db_name] - events_collection = db.get_collection(self.test_collection_prefix + "events") - # Attempt a simple operation to confirm the connection - await events_collection.count_documents({}) - break # Exit the loop if connection is successful - except Exception as e: - print(f"Connection failed: {e}. Retrying...") - time.sleep(0.5) - - # Check that there are no events in the collection - count = await events_collection.count_documents({}) - assert count == 0, "There are existing events in the database" - - # Close the MongoDB connection - client.close() + await self.wait_for_port_open(27017) async def check(self, module_test, events): try: diff --git a/bbot/test/test_step_2/module_tests/test_module_mysql.py b/bbot/test/test_step_2/module_tests/test_module_mysql.py index 4867c568d5..709b3ca287 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mysql.py +++ b/bbot/test/test_step_2/module_tests/test_module_mysql.py @@ -1,5 +1,4 @@ import asyncio -import time from .base import ModuleTestBase @@ -28,20 +27,8 @@ async def setup_before_prep(self, module_test): ) stdout, stderr = await process.communicate() - import aiomysql - # wait for the container to start - start_time = time.time() - while True: - try: - conn = await aiomysql.connect(user="root", password="bbotislife", db="bbot", host="localhost") - conn.close() - break - except Exception as e: - if time.time() - start_time > 60: # timeout after 60 seconds - self.log.error("MySQL server did not start in time.") - raise e - await asyncio.sleep(1) + await self.wait_for_port(3306) if process.returncode != 0: self.log.error(f"Failed to start MySQL server: {stderr.decode()}") diff --git a/bbot/test/test_step_2/module_tests/test_module_nats.py b/bbot/test/test_step_2/module_tests/test_module_nats.py new file mode 100644 index 0000000000..d4e46d8061 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_nats.py @@ -0,0 +1,64 @@ +import json +import asyncio +import nats +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestNats(ModuleTestBase): + config_overrides = { + "modules": { + "nats": { + "servers": ["nats://localhost:4222"], + "subject": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start NATS server + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-nats", "-p", "4222:4222", "nats:latest" + ) + + # Wait for NATS to be ready by checking the port + await self.wait_for_port_open(4222) + + # Connect to NATS + try: + self.nc = await nats.connect(["nats://localhost:4222"]) + except Exception as e: + self.log.error(f"Error connecting to NATS: {e}") + raise + + # Collect events from NATS + self.nats_events = [] + + async def message_handler(msg): + event_data = json.loads(msg.data.decode("utf-8")) + self.nats_events.append(event_data) + + await self.nc.subscribe("bbot_events", cb=message_handler) + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + self.nats_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == self.nats_events, "Events do not match" + + finally: + with suppress(Exception): + # Clean up: Stop the NATS client + if self.nc.is_connected: + await self.nc.drain() + await self.nc.close() + # Stop NATS server container + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-nats", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_postgres.py b/bbot/test/test_step_2/module_tests/test_module_postgres.py index ea6c00210c..c1d7b102cb 100644 --- a/bbot/test/test_step_2/module_tests/test_module_postgres.py +++ b/bbot/test/test_step_2/module_tests/test_module_postgres.py @@ -1,4 +1,3 @@ -import time import asyncio from .base import ModuleTestBase @@ -25,27 +24,8 @@ async def setup_before_prep(self, module_test): "postgres", ) - import asyncpg - # wait for the container to start - start_time = time.time() - while True: - try: - # Connect to the default 'postgres' database to create 'bbot' - conn = await asyncpg.connect( - user="postgres", password="bbotislife", database="postgres", host="127.0.0.1" - ) - await conn.execute("CREATE DATABASE bbot") - await conn.close() - break - except asyncpg.exceptions.DuplicateDatabaseError: - # If the database already exists, break the loop - break - except Exception as e: - if time.time() - start_time > 60: # timeout after 60 seconds - self.log.error("PostgreSQL server did not start in time.") - raise e - await asyncio.sleep(1) + await self.wait_for_port(5432) if process.returncode != 0: self.log.error("Failed to start PostgreSQL server") From 712845ec60dfdfe8ca2ce3c9edd2d117d8f47b21 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 23 Nov 2024 00:12:48 -0500 Subject: [PATCH 044/147] fix tests? --- bbot/test/test_step_2/module_tests/test_module_nats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_nats.py b/bbot/test/test_step_2/module_tests/test_module_nats.py index d4e46d8061..66f4d38937 100644 --- a/bbot/test/test_step_2/module_tests/test_module_nats.py +++ b/bbot/test/test_step_2/module_tests/test_module_nats.py @@ -1,6 +1,5 @@ import json import asyncio -import nats from contextlib import suppress from .base import ModuleTestBase @@ -27,6 +26,8 @@ async def setup_before_prep(self, module_test): await self.wait_for_port_open(4222) # Connect to NATS + import nats + try: self.nc = await nats.connect(["nats://localhost:4222"]) except Exception as e: From e7e89e5eac1dac9ddefaddb2b20a8173606c0d97 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 23 Nov 2024 02:41:12 -0500 Subject: [PATCH 045/147] fix cli tests --- bbot/test/test_step_1/test_python_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index eaa9636b1c..e968b1f2e4 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -95,7 +95,7 @@ def test_python_api_validation(): # invalid output module with pytest.raises(ValidationError) as error: Scanner(output_modules=["asdf"]) - assert str(error.value) == 'Could not find output module "asdf". Did you mean "teams"?' + assert str(error.value) == 'Could not find output module "asdf". Did you mean "nats"?' # invalid excluded module with pytest.raises(ValidationError) as error: Scanner(exclude_modules=["asdf"]) From 91893dbfae904bef53ce7797afd3f25bf92500db Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 23 Nov 2024 12:19:47 -0500 Subject: [PATCH 046/147] fix tests? --- bbot/test/test_step_1/test_python_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index e968b1f2e4..1a549d549e 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -119,7 +119,7 @@ def test_python_api_validation(): # normal module as output module with pytest.raises(ValidationError) as error: Scanner(output_modules=["robots"]) - assert str(error.value) == 'Could not find output module "robots". Did you mean "web_report"?' + assert str(error.value) == 'Could not find output module "robots". Did you mean "nats"?' # invalid preset type with pytest.raises(ValidationError) as error: Scanner(preset="asdf") From a5b2a5d7e6a1604995d1f456d2d98c08eb8761c8 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:26:49 -0500 Subject: [PATCH 047/147] fix elastic tests --- .../test_step_2/module_tests/test_module_elastic.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 902ef3539e..98abb4087d 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -38,7 +38,18 @@ async def setup_before_prep(self, module_test): "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", ) - await self.wait_for_port_open(9200) + # Connect to Elasticsearch with retry logic + async with httpx.AsyncClient(verify=False) as client: + while True: + try: + # Attempt a simple operation to confirm the connection + response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) + response.raise_for_status() + break + except Exception as e: + print(f"Connection failed: {e}. Retrying...", flush=True) + await asyncio.sleep(0.5) + # Connect to Elasticsearch with retry logic async with httpx.AsyncClient(verify=False) as client: From e2f4944b0c77f9aae4242ced03b6434b19b0300c Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:42:49 -0500 Subject: [PATCH 048/147] bump version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a407bcd7af..bfcbdd2594 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bbot" -version = "2.3.0" +version = "3.0.0" description = "OSINT automation for hackers." authors = [ "TheTechromancer", @@ -107,7 +107,7 @@ lint.ignore = ["E402", "E711", "E712", "E713", "E721", "E731", "E741", "F401", " [tool.poetry-dynamic-versioning] enable = true metadata = false -format-jinja = 'v2.3.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' +format-jinja = 'v3.0.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' [tool.poetry-dynamic-versioning.substitution] files = ["*/__init__.py"] From 21c2548ab5ee1835d3eda815a63571a4689dba69 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 23:22:52 -0500 Subject: [PATCH 049/147] fix conflict --- bbot/core/event/base.py | 6 +-- bbot/scanner/scanner.py | 3 +- bbot/test/bbot_fixtures.py | 76 ++++++++++++++++++++++++++------------ 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 32d6f7a3a1..1f8919ff82 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,6 +40,7 @@ validators, get_file_extension, ) +from bbot.db.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") @@ -802,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = self.timestamp.isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() # parent event parent_id = self.parent_id if parent_id: @@ -811,8 +812,7 @@ def json(self, mode="json", siem_friendly=False): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - if self.tags: - j.update({"tags": list(self.tags)}) + j.update({"tags": list(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index dcdb2a873f..7e058ae6fb 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -500,7 +500,8 @@ async def setup_modules(self, remove_failed=True): self.modules[module.name].set_error_state() hard_failed.append(module.name) else: - self.info(f"Setup soft-failed for {module.name}: {msg}") + log_fn = self.warning if module._type == "output" else self.info + log_fn(f"Setup soft-failed for {module.name}: {msg}") soft_failed.append(module.name) if (not status) and (module._intercept or remove_failed): # if a intercept module fails setup, we always remove it diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 070df6e9a3..be4e2b92bf 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -147,48 +147,78 @@ def helpers(scan): @pytest.fixture def events(scan): + + dummy_module = scan._make_dummy_module("dummy_module") + class bbot_events: - localhost = scan.make_event("127.0.0.1", parent=scan.root_event) - ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event) - netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event) - ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event) - netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event) - domain = scan.make_event("publicAPIs.org", parent=scan.root_event) - subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event) - email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event) - open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event) + localhost = scan.make_event("127.0.0.1", parent=scan.root_event, module=dummy_module) + ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event, module=dummy_module) + netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event, module=dummy_module) + ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event, module=dummy_module) + netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event, module=dummy_module) + domain = scan.make_event("publicAPIs.org", parent=scan.root_event, module=dummy_module) + subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event, module=dummy_module) + email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event, module=dummy_module) + open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event, module=dummy_module) protocol = scan.make_event( - {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, "PROTOCOL", parent=scan.root_event + {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, + "PROTOCOL", + parent=scan.root_event, + module=dummy_module, + ) + ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event, module=dummy_module) + ipv6_open_port = scan.make_event( + "[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event, module=dummy_module + ) + url_unverified = scan.make_event( + "https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv4_url_unverified = scan.make_event( + "https://8.8.8.8:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv6_url_unverified = scan.make_event( + "https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event, module=dummy_module ) - ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event) - ipv6_open_port = scan.make_event("[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event) - url_unverified = scan.make_event("https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event) - ipv4_url_unverified = scan.make_event("https://8.8.8.8:443/hellofriend", parent=scan.root_event) - ipv6_url_unverified = scan.make_event("https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event) url = scan.make_event( - "https://api.publicAPIs.org:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://api.publicAPIs.org:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, ) ipv4_url = scan.make_event( - "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event, module=dummy_module ) ipv6_url = scan.make_event( - "https://[2001:4860:4860::8888]:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://[2001:4860:4860::8888]:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, + ) + url_hint = scan.make_event( + "https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url, module=dummy_module ) - url_hint = scan.make_event("https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url) vulnerability = scan.make_event( {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, "VULNERABILITY", parent=scan.root_event, + module=dummy_module, + ) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event, module=dummy_module + ) + vhost = scan.make_event( + {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module ) - finding = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event) - vhost = scan.make_event({"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event) - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) + http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, "STORAGE_BUCKET", parent=scan.root_event, + module=dummy_module, ) - emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event) + emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event, module=dummy_module) bbot_events.all = [ # noqa: F841 bbot_events.localhost, From 90d6ba2ea7d47bfd23cf897dee79efd966469094 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 17:25:16 -0500 Subject: [PATCH 050/147] resolve conflict --- bbot/core/event/base.py | 2 +- bbot/models/helpers.py | 16 +++ bbot/models/pydantic.py | 111 ++++++++++++++++++ bbot/{db/sql/models.py => models/sql.py} | 0 bbot/modules/output/mongo.py | 68 +++++++++++ bbot/modules/templates/sql.py | 2 +- bbot/test/test_step_1/test_db_models.py | 29 +++++ .../module_tests/test_module_mongo.py | 81 +++++++++++++ 8 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 bbot/models/helpers.py create mode 100644 bbot/models/pydantic.py rename bbot/{db/sql/models.py => models/sql.py} (100%) create mode 100644 bbot/modules/output/mongo.py create mode 100644 bbot/test/test_step_1/test_db_models.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_mongo.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 1f8919ff82..0d56e96e30 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,7 +40,7 @@ validators, get_file_extension, ) -from bbot.db.helpers import naive_datetime_validator +from bbot.models.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py new file mode 100644 index 0000000000..40e127c53b --- /dev/null +++ b/bbot/models/helpers.py @@ -0,0 +1,16 @@ +from datetime import datetime +from typing_extensions import Annotated +from pydantic.functional_validators import AfterValidator + + +def naive_datetime_validator(d: datetime): + """ + Converts all dates into UTC, then drops timezone information. + + This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + """ + # drop timezone info + return d.replace(tzinfo=None) + + +NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py new file mode 100644 index 0000000000..0d54cc91b7 --- /dev/null +++ b/bbot/models/pydantic.py @@ -0,0 +1,111 @@ +import json +import logging +from datetime import datetime +from typing import Optional, List, Union, Annotated +from pydantic import BaseModel, ConfigDict, field_serializer + +from bbot.models.helpers import NaiveUTC, naive_datetime_validator + +log = logging.getLogger("bbot_server.models") + + +class BBOTBaseModel(BaseModel): + model_config = ConfigDict(extra="ignore") + + def to_json(self, **kwargs): + return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + + def __hash__(self): + return hash(self.to_json()) + + def __eq__(self, other): + return hash(self) == hash(other) + + +### EVENT ### + +class Event(BBOTBaseModel): + uuid: Annotated[str, "indexed", "unique"] + id: Annotated[str, "indexed"] + type: Annotated[str, "indexed"] + scope_description: str + data: Union[dict, str] + host: Annotated[Optional[str], "indexed"] = None + port: Optional[int] = None + netloc: Optional[str] = None + # we store the host in reverse to allow for instant subdomain queries + # this works because indexes are left-anchored, but we need to search starting from the right side + reverse_host: Annotated[Optional[str], "indexed"] = "" + resolved_hosts: Union[List, None] = None + dns_children: Union[dict, None] = None + web_spider_distance: int = 10 + scope_distance: int = 10 + scan: Annotated[str, "indexed"] + timestamp: Annotated[NaiveUTC, "indexed"] + parent: Annotated[str, "indexed"] + parent_uuid: Annotated[str, "indexed"] + tags: List = [] + module: Annotated[Optional[str], "indexed"] = None + module_sequence: Optional[str] = None + discovery_context: str = "" + discovery_path: List[str] = [] + parent_chain: List[str] = [] + + def __init__(self, **data): + super().__init__(**data) + if self.host: + self.reverse_host = self.host[::-1] + + @staticmethod + def _get_data(data, type): + if isinstance(data, dict) and list(data) == [type]: + return data[type] + return data + + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + + @field_serializer("timestamp") + def serialize_timestamp(self, timestamp: datetime, _info): + return naive_datetime_validator(timestamp).isoformat() + + +### SCAN ### + +class Scan(BBOTBaseModel): + id: Annotated[str, "indexed", "unique"] + name: str + status: Annotated[str, "indexed"] + started_at: Annotated[NaiveUTC, "indexed"] + finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + duration_seconds: Optional[float] = None + duration: Optional[str] = None + target: dict + preset: dict + + @classmethod + def from_scan(cls, scan): + return cls( + id=scan.id, + name=scan.name, + status=scan.status, + started_at=scan.started_at, + ) + + +### TARGET ### + +class Target(BBOTBaseModel): + name: str = "Default Target" + strict_scope: bool = False + seeds: List = [] + whitelist: List = [] + blacklist: List = [] + hash: Annotated[str, "indexed", "unique"] + scope_hash: Annotated[str, "indexed"] + seed_hash: Annotated[str, "indexed"] + whitelist_hash: Annotated[str, "indexed"] + blacklist_hash: Annotated[str, "indexed"] diff --git a/bbot/db/sql/models.py b/bbot/models/sql.py similarity index 100% rename from bbot/db/sql/models.py rename to bbot/models/sql.py diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py new file mode 100644 index 0000000000..dd4efa47ce --- /dev/null +++ b/bbot/modules/output/mongo.py @@ -0,0 +1,68 @@ +from motor.motor_asyncio import AsyncIOMotorClient + +from bbot.models.pydantic import Event +from bbot.modules.output.base import BaseOutputModule + + +class Mongo(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a MongoDB database", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "uri": "mongodb://localhost:27017", + "database": "bbot", + "collection_prefix": "", + } + options_desc = { + "uri": "The URI of the MongoDB server", + "database": "The name of the database to use", + "collection_prefix": "Prefix each collection with this string", + } + deps_pip = ["motor~=3.6.0"] + + async def setup(self): + self.uri = self.config.get("uri", "mongodb://localhost:27017") + self.db_client = AsyncIOMotorClient(self.uri) + + # Ping the server to confirm a successful connection + try: + await self.db_client.admin.command("ping") + self.verbose("MongoDB connection successful") + except Exception as e: + return False, f"Failed to connect to MongoDB: {e}" + + self.db_name = self.config.get("database", "bbot") + self.db = self.db_client[self.db_name] + self.collection_prefix = self.config.get("collection_prefix", "") + self.events_collection = self.db[f"{self.collection_prefix}events"] + self.scans_collection = self.db[f"{self.collection_prefix}scans"] + self.targets_collection = self.db[f"{self.collection_prefix}targets"] + + # Build an index for each field in reverse_host and host + for field in Event._indexed_fields(): + await self.collection.create_index([(field, 1)]) + self.verbose(f"Index created for field: {field}") + + return True + + async def handle_event(self, event): + event_json = event.json() + event_pydantic = Event(**event_json) + await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": + # here we merge the scan with the one sharing its UUID. + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + # Merge logic here, for example, update the existing scan with new data + updated_scan = {**existing_scan, **event_pydantic.model_dump()} + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py index 39b4e6f00e..42f5494555 100644 --- a/bbot/modules/templates/sql.py +++ b/bbot/modules/templates/sql.py @@ -3,7 +3,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from bbot.db.sql.models import Event, Scan, Target +from bbot.models.sql import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py new file mode 100644 index 0000000000..4e003f6f57 --- /dev/null +++ b/bbot/test/test_step_1/test_db_models.py @@ -0,0 +1,29 @@ +from bbot.models.pydantic import Event +from ..bbot_fixtures import * # noqa + + +def test_pydantic_models(events): + + test_event = Event(**events.ipv4.json()) + assert sorted(test_event._indexed_fields()) == [ + "host", + "id", + "module", + "parent", + "parent_uuid", + "reverse_host", + "scan", + "timestamp", + "type", + "uuid", + ] + + # events + for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + e = getattr(events, event) + event_json = e.json() + event_pydantic = Event(**event_json) + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + + +# TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py new file mode 100644 index 0000000000..10a8655e81 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -0,0 +1,81 @@ +from .base import ModuleTestBase + + +class TestMongo(ModuleTestBase): + test_db_name = "bbot_test" + test_collection_name = "events_test" + config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + + async def setup_before_module(self): + from motor.motor_asyncio import AsyncIOMotorClient + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # Check that there are no events in the collection + count = await collection.count_documents({}) + assert count == 0, "There are existing events in the database" + + # Close the MongoDB connection + client.close() + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + from motor.motor_asyncio import AsyncIOMotorClient + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # make sure the collection has all the right indexes + cursor = collection.list_indexes() + indexes = await cursor.to_list(length=None) + for field in Event._indexed_fields(): + assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + + # Fetch all events from the collection + cursor = collection.find({}) + db_events = await cursor.to_list(length=None) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Compare the sorted lists + assert len(events_json) == len(db_events_pydantic) + # Events don't match exactly because the mongo ones have reverse_host + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the collection + await collection.delete_many({}) + # Close the MongoDB connection + client.close() From 3f7be0b1972f1a2d7aaae0b7322cf0563bc27f24 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 18:39:37 -0500 Subject: [PATCH 051/147] more wip mongo --- bbot/models/helpers.py | 6 ++++- bbot/models/pydantic.py | 31 +++++++++++++++++-------- bbot/modules/output/mongo.py | 19 +++++++-------- bbot/test/test_step_1/test_db_models.py | 8 +++++++ 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 40e127c53b..985c845994 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -3,7 +3,7 @@ from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime): +def naive_datetime_validator(d: datetime) -> datetime: """ Converts all dates into UTC, then drops timezone information. @@ -13,4 +13,8 @@ def naive_datetime_validator(d: datetime): return d.replace(tzinfo=None) +def naive_utc_now() -> datetime: + return naive_datetime_validator(datetime.now()) + + NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0d54cc91b7..fe179878e7 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -2,9 +2,9 @@ import logging from datetime import datetime from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer +from pydantic import BaseModel, ConfigDict, field_serializer, Field -from bbot.models.helpers import NaiveUTC, naive_datetime_validator +from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,8 +12,18 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, **kwargs): - return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + def to_json(self, preserve_datetime=False): + ret = self.model_dump() + if preserve_datetime: + for key in ret: + val = getattr(self, key, None) + if isinstance(val, datetime): + ret[key] = val + return ret + + def to_json_string(self, preserve_datetime=False, **kwargs): + kwargs['sort_keys'] = True + return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) def __hash__(self): return hash(self.to_json()) @@ -21,6 +31,12 @@ def __hash__(self): def __eq__(self, other): return hash(self) == hash(other) + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + ### EVENT ### @@ -42,6 +58,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[NaiveUTC, "indexed"] + inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -62,12 +79,6 @@ def _get_data(data, type): return data[type] return data - @classmethod - def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) - @field_serializer("timestamp") def serialize_timestamp(self, timestamp: datetime, _info): return naive_datetime_validator(timestamp).isoformat() diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index dd4efa47ce..bb92d19d8a 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -1,6 +1,6 @@ from motor.motor_asyncio import AsyncIOMotorClient -from bbot.models.pydantic import Event +from bbot.models.pydantic import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule @@ -42,9 +42,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event._indexed_fields(): - await self.collection.create_index([(field, 1)]) - self.verbose(f"Index created for field: {field}") + for field in Event.model_fields: + if "indexed" in field.metadata: + unique = "unique" in field.metadata + await self.collection.create_index([(field, 1)], unique=unique) + self.verbose(f"Index created for field: {field}") return True @@ -52,17 +54,14 @@ async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": - # here we merge the scan with the one sharing its UUID. + scan_json = Scan.from_event(event).model_dump() existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) if existing_scan: - # Merge logic here, for example, update the existing scan with new data - updated_scan = {**existing_scan, **event_pydantic.model_dump()} - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") - - diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 4e003f6f57..1ba970f0e7 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,3 +1,5 @@ +from datetime import datetime + from bbot.models.pydantic import Event from ..bbot_fixtures import * # noqa @@ -23,6 +25,12 @@ def test_pydantic_models(events): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) + event_pydantic_dict = event_pydantic.to_json() + event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + assert isinstance(event_pydantic_dict["timestamp"], str) + assert isinstance(event_pydantic_dict["inserted_at"], str) + assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) + assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json From f0b20dad0ff4359ca845d14f2590f2d8376b2ded Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 20:08:24 -0500 Subject: [PATCH 052/147] more mongo wip --- bbot/models/pydantic.py | 66 ++++++++------- bbot/modules/output/mongo.py | 34 ++++---- bbot/test/test_step_1/test_db_models.py | 14 +++- .../module_tests/test_module_mongo.py | 81 +++++++++++++++---- 4 files changed, 133 insertions(+), 62 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index fe179878e7..906801693a 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,10 +1,9 @@ -import json import logging from datetime import datetime -from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer, Field +from pydantic import BaseModel, ConfigDict, Field +from typing import Optional, List, Union, Annotated, get_type_hints -from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now +from bbot.models.helpers import NaiveUTC, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,19 +11,14 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, preserve_datetime=False): - ret = self.model_dump() - if preserve_datetime: - for key in ret: - val = getattr(self, key, None) - if isinstance(val, datetime): - ret[key] = val + def model_dump(self, preserve_datetime=False, **kwargs): + ret = super().model_dump(**kwargs) + if not preserve_datetime: + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].isoformat() return ret - def to_json_string(self, preserve_datetime=False, **kwargs): - kwargs['sort_keys'] = True - return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) - def __hash__(self): return hash(self.to_json()) @@ -33,13 +27,37 @@ def __eq__(self, other): @classmethod def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) + return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) + + @classmethod + def _get_type_hints(cls): + """ + Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + """ + type_hints = get_type_hints(cls) + unwrapped_type_hints = {} + for field_name in cls.model_fields: + type_hint = type_hints[field_name] + while 1: + if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + type_hint = type_hint.__args__[0] + else: + break + unwrapped_type_hints[field_name] = type_hint + return unwrapped_type_hints + + @classmethod + def _datetime_fields(cls): + datetime_fields = [] + for field_name, type_hint in cls._get_type_hints().items(): + if type_hint == datetime: + datetime_fields.append(field_name) + return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -73,19 +91,10 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] - @staticmethod - def _get_data(data, type): - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data - - @field_serializer("timestamp") - def serialize_timestamp(self, timestamp: datetime, _info): - return naive_datetime_validator(timestamp).isoformat() - ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -109,6 +118,7 @@ def from_scan(cls, scan): ### TARGET ### + class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bb92d19d8a..bc323d7ad9 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -14,18 +14,24 @@ class Mongo(BaseOutputModule): options = { "uri": "mongodb://localhost:27017", "database": "bbot", + "username": "", + "password": "", "collection_prefix": "", } options_desc = { "uri": "The URI of the MongoDB server", "database": "The name of the database to use", + "username": "The username to use to connect to the database", + "password": "The password to use to connect to the database", "collection_prefix": "Prefix each collection with this string", } deps_pip = ["motor~=3.6.0"] async def setup(self): self.uri = self.config.get("uri", "mongodb://localhost:27017") - self.db_client = AsyncIOMotorClient(self.uri) + self.username = self.config.get("username", "") + self.password = self.config.get("password", "") + self.db_client = AsyncIOMotorClient(self.uri, username=self.username, password=self.password) # Ping the server to confirm a successful connection try: @@ -42,11 +48,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event.model_fields: + for field_name, field in Event.model_fields.items(): if "indexed" in field.metadata: unique = "unique" in field.metadata - await self.collection.create_index([(field, 1)], unique=unique) - self.verbose(f"Index created for field: {field}") + await self.events_collection.create_index([(field_name, 1)], unique=unique) + self.verbose(f"Index created for field: {field_name} (unique={unique})") return True @@ -55,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - else: - # Insert as a new scan if no existing scan is found - await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + # if event.type == "SCAN": + # scan_json = Scan.from_event(event).model_dump() + # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + # if existing_scan: + # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + # else: + # # Insert as a new scan if no existing scan is found + # await self.scans_collection.insert_one(event_pydantic.model_dump()) + # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 1ba970f0e7..5a6fce547c 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -6,6 +6,8 @@ def test_pydantic_models(events): + assert Event._datetime_fields() == ["inserted_at", "timestamp"] + test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ "host", @@ -20,18 +22,22 @@ def test_pydantic_models(events): "uuid", ] - # events + # convert events to pydantic and back, making sure they're exactly the same for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) - event_pydantic_dict = event_pydantic.to_json() - event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + event_pydantic_dict = event_pydantic.model_dump() + event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) + assert isinstance(event_json["timestamp"], str) + assert isinstance(e.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, datetime) + assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], str) assert isinstance(event_pydantic_dict["inserted_at"], str) assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json # TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 10a8655e81..839e46156e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -1,21 +1,58 @@ +import time +import asyncio + from .base import ModuleTestBase class TestMongo(ModuleTestBase): test_db_name = "bbot_test" - test_collection_name = "events_test" - config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + test_collection_prefix = "test_" + config_overrides = { + "modules": { + "mongo": { + "database": test_db_name, + "username": "bbot", + "password": "bbotislife", + "collection_prefix": test_collection_prefix, + } + } + } + + async def setup_before_prep(self, module_test): + + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-mongo", + "--rm", + "-e", + "MONGO_INITDB_ROOT_USERNAME=bbot", + "-e", + "MONGO_INITDB_ROOT_PASSWORD=bbotislife", + "-p", + "27017:27017", + "-d", + "mongo", + ) - async def setup_before_module(self): from motor.motor_asyncio import AsyncIOMotorClient - # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") - db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + # Connect to the MongoDB collection with retry logic + while True: + try: + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") + db = client[self.test_db_name] + events_collection = db.get_collection(self.test_collection_prefix + "events") + # Attempt a simple operation to confirm the connection + await events_collection.count_documents({}) + break # Exit the loop if connection is successful + except Exception as e: + print(f"Connection failed: {e}. Retrying in 5 seconds...") + time.sleep(0.5) # Check that there are no events in the collection - count = await collection.count_documents({}) + count = await events_collection.count_documents({}) assert count == 0, "There are existing events in the database" # Close the MongoDB connection @@ -30,20 +67,30 @@ async def check(self, module_test, events): events_json.sort(key=lambda x: x["timestamp"]) # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + events_collection = db.get_collection(self.test_collection_prefix + "events") # make sure the collection has all the right indexes - cursor = collection.list_indexes() + cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" # Fetch all events from the collection - cursor = collection.find({}) + cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + # we currently don't store timestamps as datetime objects because mongodb has lower precision + # assert isinstance(db_event["timestamp"], datetime) + # assert isinstance(db_event["inserted_at"], datetime) + assert isinstance(db_event["timestamp"], str) + assert isinstance(db_event["inserted_at"], str) + # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] db_events_pydantic.sort(key=lambda x: x["timestamp"]) @@ -65,17 +112,19 @@ async def check(self, module_test, events): main_event.get("reverse_host") == expected_reverse_host ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" - # Compare the sorted lists - assert len(events_json) == len(db_events_pydantic) - # Events don't match exactly because the mongo ones have reverse_host + # Events don't match exactly because the mongo ones have reverse_host and inserted_at assert events_json != db_events_pydantic for db_event in db_events_pydantic: db_event.pop("reverse_host") + db_event.pop("inserted_at") # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" finally: # Clean up: Delete all documents in the collection - await collection.delete_many({}) + await events_collection.delete_many({}) # Close the MongoDB connection client.close() + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-mongo", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From f34987618e0593e33e32ff6d375e190d3b373def Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 20 Nov 2024 11:54:12 -0500 Subject: [PATCH 053/147] skip distro tests --- bbot/test/test_step_2/module_tests/test_module_mongo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 839e46156e..31e7f70747 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -17,6 +17,7 @@ class TestMongo(ModuleTestBase): } } } + skip_distro_tests = True async def setup_before_prep(self, module_test): @@ -48,7 +49,7 @@ async def setup_before_prep(self, module_test): await events_collection.count_documents({}) break # Exit the loop if connection is successful except Exception as e: - print(f"Connection failed: {e}. Retrying in 5 seconds...") + print(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Check that there are no events in the collection From 2a5809ce66864606ea63d3ac1c84dfaad870ab82 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 10:35:12 -0500 Subject: [PATCH 054/147] more wip mongo --- bbot/core/event/base.py | 8 ++++++-- bbot/models/pydantic.py | 10 +++++----- bbot/modules/output/mongo.py | 20 ++++++++++---------- bbot/test/bbot_fixtures.py | 14 +++++++------- bbot/test/test_step_1/test_db_models.py | 9 +++------ bbot/test/test_step_1/test_events.py | 6 +++--- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 0d56e96e30..a0b0d50e8a 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -803,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1770,7 +1770,11 @@ def event_from_json(j, siem_friendly=False): resolved_hosts = j.get("resolved_hosts", []) event._resolved_hosts = set(resolved_hosts) - event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) + # accept both isoformat and unix timestamp + try: + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + except Exception: + event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] parent_id = j.get("parent", None) if parent_id is not None: diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 906801693a..388d85f05f 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -11,12 +11,12 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, preserve_datetime=False, **kwargs): + def model_dump(self, **kwargs): ret = super().model_dump(**kwargs) - if not preserve_datetime: - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].isoformat() + # convert datetime fields to unix timestamps + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].timestamp() return ret def __hash__(self): diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bc323d7ad9..03185b169c 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -61,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - # if event.type == "SCAN": - # scan_json = Scan.from_event(event).model_dump() - # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - # if existing_scan: - # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - # else: - # # Insert as a new scan if no existing scan is found - # await self.scans_collection.insert_one(event_pydantic.model_dump()) - # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + if event.type == "SCAN": + scan_json = Scan.from_event(event).model_dump() + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index be4e2b92bf..58038d860b 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -@pytest.fixture(scope="session", autouse=True) -def install_all_python_deps(): - deps_pip = set() - for module in DEFAULT_PRESET.module_loader.preloaded().values(): - deps_pip.update(set(module.get("deps", {}).get("pip", []))) +# @pytest.fixture(scope="session", autouse=True) +# def install_all_python_deps(): +# deps_pip = set() +# for module in DEFAULT_PRESET.module_loader.preloaded().values(): +# deps_pip.update(set(module.get("deps", {}).get("pip", []))) - constraint_file = tempwordlist(get_python_constraints()) +# constraint_file = tempwordlist(get_python_constraints()) - subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) +# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 5a6fce547c..d29e7e79a8 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -28,15 +28,12 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) - assert isinstance(event_json["timestamp"], str) + assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json - assert isinstance(event_pydantic_dict["timestamp"], str) - assert isinstance(event_pydantic_dict["inserted_at"], str) - assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) - assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) + assert isinstance(event_pydantic_dict["timestamp"], float) + assert isinstance(event_pydantic_dict["inserted_at"], float) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 39be4d704b..a8769cf3ba 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.isoformat() + timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) @@ -515,7 +515,7 @@ async def test_events(events, helpers): assert reconstituted_event.uuid == db_event.uuid assert reconstituted_event.parent_uuid == scan.root_event.uuid assert reconstituted_event.scope_distance == 1 - assert reconstituted_event.timestamp.isoformat() == timestamp + assert reconstituted_event.timestamp.timestamp() == timestamp assert reconstituted_event.data == "evilcorp.com:80" assert reconstituted_event.type == "OPEN_TCP_PORT" assert reconstituted_event.host == "evilcorp.com" @@ -538,7 +538,7 @@ async def test_events(events, helpers): assert json_event_siemfriendly["timestamp"] == timestamp reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.isoformat() == timestamp + assert reconstituted_event2.timestamp.timestamp() == timestamp assert reconstituted_event2.data == "evilcorp.com:80" assert reconstituted_event2.type == "OPEN_TCP_PORT" assert reconstituted_event2.host == "evilcorp.com" From 2c1c021fc52afceeba3b5e2be569f86935117d6c Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:27:45 -0500 Subject: [PATCH 055/147] fix conflict --- bbot/core/event/base.py | 18 +++++++--------- bbot/models/pydantic.py | 14 ++++++++----- bbot/models/sql.py | 21 +++++++------------ bbot/modules/output/http.py | 5 +---- bbot/modules/output/json.py | 6 ++---- bbot/modules/output/mongo.py | 8 +++++++ bbot/test/test_step_1/test_events.py | 21 +++++-------------- .../module_tests/test_module_http.py | 9 -------- .../module_tests/test_module_json.py | 15 ------------- .../module_tests/test_module_mongo.py | 20 +++++++++++++----- docs/scanning/tips_and_tricks.md | 18 ---------------- 11 files changed, 55 insertions(+), 100 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index a0b0d50e8a..89d8474116 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -756,7 +756,7 @@ def __contains__(self, other): return bool(radixtarget.search(other.host)) return False - def json(self, mode="json", siem_friendly=False): + def json(self, mode="json"): """ Serializes the event object to a JSON-compatible dictionary. @@ -765,7 +765,6 @@ def json(self, mode="json", siem_friendly=False): Parameters: mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id". - siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary). Returns: dict: JSON-serializable dictionary representation of the event object. @@ -782,10 +781,12 @@ def json(self, mode="json", siem_friendly=False): data = data_attr else: data = smart_decode(self.data) - if siem_friendly: - j["data"] = {self.type: data} - else: + if isinstance(data, str): j["data"] = data + elif isinstance(data, dict): + j["data_json"] = data + else: + raise ValueError(f"Invalid data type: {type(data)}") # host, dns children if self.host: j["host"] = str(self.host) @@ -1725,7 +1726,7 @@ def make_event( ) -def event_from_json(j, siem_friendly=False): +def event_from_json(j): """ Creates an event object from a JSON dictionary. @@ -1757,10 +1758,7 @@ def event_from_json(j, siem_friendly=False): "context": j.get("discovery_context", None), "dummy": True, } - if siem_friendly: - data = j["data"][event_type] - else: - data = j["data"] + data = j.get("data_json", j.get("data", None)) kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 388d85f05f..0591a93515 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -57,13 +57,13 @@ def _datetime_fields(cls): ### EVENT ### - class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] type: Annotated[str, "indexed"] scope_description: str - data: Union[dict, str] + data: Annotated[Optional[str], "indexed"] = None + data_json: Optional[dict] = None host: Annotated[Optional[str], "indexed"] = None port: Optional[int] = None netloc: Optional[str] = None @@ -75,8 +75,8 @@ class Event(BBOTBaseModel): web_spider_distance: int = 10 scope_distance: int = 10 scan: Annotated[str, "indexed"] - timestamp: Annotated[NaiveUTC, "indexed"] - inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) + timestamp: Annotated[float, "indexed"] + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -91,9 +91,13 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] + def get_data(self): + if self.data is not None: + return self.data + return self.data_json -### SCAN ### +### SCAN ### class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index d6e7656108..9c5c8ef11a 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -67,24 +67,18 @@ def __eq__(self, other): ### EVENT ### - class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - data = self._get_data(self.data, self.type) - self.data = {self.type: data} + if self.data is None and self.data_json is None: + raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] def get_data(self): - return self._get_data(self.data, self.type) - - @staticmethod - def _get_data(data, type): - # handle SIEM-friendly format - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data + if self.data is not None: + return self.data + return self.data_json uuid: str = Field( primary_key=True, @@ -94,7 +88,8 @@ def _get_data(data, type): id: str = Field(index=True) type: str = Field(index=True) scope_description: str - data: dict = Field(sa_type=JSON) + data: Optional[str] = Field(default=None, index=True) + data_json: Optional[dict] = Field(default=None) host: Optional[str] port: Optional[int] netloc: Optional[str] @@ -118,7 +113,6 @@ def _get_data(data, type): ### SCAN ### - class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -133,7 +127,6 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### - class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 9d9241da0b..7d94148d72 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -15,7 +15,6 @@ class HTTP(BaseOutputModule): "username": "", "password": "", "timeout": 10, - "siem_friendly": False, } options_desc = { "url": "Web URL", @@ -24,14 +23,12 @@ class HTTP(BaseOutputModule): "username": "Username (basic auth)", "password": "Password (basic auth)", "timeout": "HTTP timeout", - "siem_friendly": "Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc.", } async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) - self.siem_friendly = self.config.get("siem_friendly", False) self.headers = {} bearer = self.config.get("bearer", "") if bearer: @@ -56,7 +53,7 @@ async def handle_event(self, event): method=self.method, auth=self.auth, headers=self.headers, - json=event.json(siem_friendly=self.siem_friendly), + json=event.json(), ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/modules/output/json.py b/bbot/modules/output/json.py index a35fa6aed7..b93d1e4e3f 100644 --- a/bbot/modules/output/json.py +++ b/bbot/modules/output/json.py @@ -11,20 +11,18 @@ class JSON(BaseOutputModule): "created_date": "2022-04-07", "author": "@TheTechromancer", } - options = {"output_file": "", "siem_friendly": False} + options = {"output_file": ""} options_desc = { "output_file": "Output to file", - "siem_friendly": "Output JSON in a SIEM-friendly format for ingestion into Elastic, Splunk, etc.", } _preserve_graph = True async def setup(self): self._prep_output_dir("output.json") - self.siem_friendly = self.config.get("siem_friendly", False) return True async def handle_event(self, event): - event_json = event.json(siem_friendly=self.siem_friendly) + event_json = event.json() event_str = json.dumps(event_json) if self.file is not None: self.file.write(event_str + "\n") diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 03185b169c..5e555ab0ff 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -71,3 +71,11 @@ async def handle_event(self, event): # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + target_data = scan_json.get("target", {}) + target = Target(**target_data) + existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + if existing_target: + await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + else: + await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index a8769cf3ba..a2654818f4 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -529,28 +529,17 @@ async def test_events(events, helpers): assert hostless_event_json["data"] == "asdf" assert "host" not in hostless_event_json - # SIEM-friendly serialize/deserialize - json_event_siemfriendly = db_event.json(siem_friendly=True) - assert json_event_siemfriendly["scope_distance"] == 1 - assert json_event_siemfriendly["data"] == {"OPEN_TCP_PORT": "evilcorp.com:80"} - assert json_event_siemfriendly["type"] == "OPEN_TCP_PORT" - assert json_event_siemfriendly["host"] == "evilcorp.com" - assert json_event_siemfriendly["timestamp"] == timestamp - reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) - assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.timestamp() == timestamp - assert reconstituted_event2.data == "evilcorp.com:80" - assert reconstituted_event2.type == "OPEN_TCP_PORT" - assert reconstituted_event2.host == "evilcorp.com" - assert "127.0.0.1" in reconstituted_event2.resolved_hosts - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" json_event = http_response.json(mode="graph") + assert "data" in json_event + assert "data_json" not in json_event assert isinstance(json_event["data"], str) json_event = http_response.json() - assert isinstance(json_event["data"], dict) + assert "data" not in json_event + assert "data_json" in json_event + assert isinstance(json_event["data_json"], dict) assert json_event["type"] == "HTTP_RESPONSE" assert json_event["host"] == "example.com" assert json_event["parent"] == scan.root_event.id diff --git a/bbot/test/test_step_2/module_tests/test_module_http.py b/bbot/test/test_step_2/module_tests/test_module_http.py index 2bc99f5ddf..df90b78525 100644 --- a/bbot/test/test_step_2/module_tests/test_module_http.py +++ b/bbot/test/test_step_2/module_tests/test_module_http.py @@ -52,12 +52,3 @@ def check(self, module_test, events): assert self.headers_correct is True assert self.method_correct is True assert self.url_correct is True - - -class TestHTTPSIEMFriendly(TestHTTP): - modules_overrides = ["http"] - config_overrides = {"modules": {"http": dict(TestHTTP.config_overrides["modules"]["http"])}} - config_overrides["modules"]["http"]["siem_friendly"] = True - - def verify_data(self, j): - return j["data"] == {"DNS_NAME": "blacklanternsecurity.com"} and j["type"] == "DNS_NAME" diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index 27ed5a55e0..bf79eeb13f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -53,18 +53,3 @@ def check(self, module_test, events): assert dns_reconstructed.discovery_context == context_data assert dns_reconstructed.discovery_path == [context_data] assert dns_reconstructed.parent_chain == [dns_json["uuid"]] - - -class TestJSONSIEMFriendly(ModuleTestBase): - modules_overrides = ["json"] - config_overrides = {"modules": {"json": {"siem_friendly": True}}} - - def check(self, module_test, events): - txt_file = module_test.scan.home / "output.json" - lines = list(module_test.scan.helpers.read_file(txt_file)) - passed = False - for line in lines: - e = json.loads(line) - if e["data"] == {"DNS_NAME": "blacklanternsecurity.com"}: - passed = True - assert passed diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 31e7f70747..fcfed7841a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -72,12 +72,16 @@ async def check(self, module_test, events): db = client[self.test_db_name] events_collection = db.get_collection(self.test_collection_prefix + "events") + ### INDEXES ### + # make sure the collection has all the right indexes cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + ### EVENTS ### + # Fetch all events from the collection cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) @@ -86,11 +90,8 @@ async def check(self, module_test, events): assert len(events_json) == len(db_events) for db_event in db_events: - # we currently don't store timestamps as datetime objects because mongodb has lower precision - # assert isinstance(db_event["timestamp"], datetime) - # assert isinstance(db_event["inserted_at"], datetime) - assert isinstance(db_event["timestamp"], str) - assert isinstance(db_event["inserted_at"], str) + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] @@ -121,6 +122,15 @@ async def check(self, module_test, events): # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" + ### SCANS ### + + # Fetch all scans from the collection + cursor = db.get_collection(self.test_collection_prefix + "scans").find({}) + db_scans = await cursor.to_list(length=None) + assert len(db_scans) == 1, "There should be exactly one scan" + db_scan = db_scans[0] + assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + finally: # Clean up: Delete all documents in the collection await events_collection.delete_many({}) diff --git a/docs/scanning/tips_and_tricks.md b/docs/scanning/tips_and_tricks.md index c5073c1d63..e13d82875e 100644 --- a/docs/scanning/tips_and_tricks.md +++ b/docs/scanning/tips_and_tricks.md @@ -108,24 +108,6 @@ config: bbot -t evilcorp.com -p skip_cdns.yml ``` -### Ingest BBOT Data Into SIEM (Elastic, Splunk) - -If your goal is to run a BBOT scan and later feed its data into a SIEM such as Elastic, be sure to enable this option when scanning: - -```bash -bbot -t evilcorp.com -c modules.json.siem_friendly=true -``` - -This ensures the `.data` event attribute is always the same type (a dictionary), by nesting it like so: -```json -{ - "type": "DNS_NAME", - "data": { - "DNS_NAME": "blacklanternsecurity.com" - } -} -``` - ### Custom HTTP Proxy Web pentesters may appreciate BBOT's ability to quickly populate Burp Suite site maps for all subdomains in a target. If your scan includes gowitness, this will capture the traffic as if you manually visited each website in your browser -- including auxiliary web resources and javascript API calls. To accomplish this, set the `web.http_proxy` config option like so: From 894c86cc60944cd4f9c84d10b4bbec1fd4d4c283 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:00:28 -0500 Subject: [PATCH 056/147] steady work on mongo, bbot 3.0 --- bbot/core/event/base.py | 7 +- bbot/models/helpers.py | 20 +++--- bbot/models/pydantic.py | 71 +++++++++---------- bbot/models/sql.py | 27 +++---- bbot/modules/output/mongo.py | 16 ++--- bbot/scanner/scanner.py | 12 ++-- bbot/test/bbot_fixtures.py | 14 ++-- bbot/test/test_step_1/test_db_models.py | 25 ++++++- bbot/test/test_step_1/test_events.py | 2 +- .../module_tests/test_module_mongo.py | 12 +++- .../module_tests/test_module_sqlite.py | 14 ++++ 11 files changed, 128 insertions(+), 92 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 89d8474116..611711eae8 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -12,6 +12,7 @@ from copy import copy from pathlib import Path from typing import Optional +from zoneinfo import ZoneInfo from contextlib import suppress from radixtarget import RadixTarget from urllib.parse import urljoin, parse_qs @@ -40,7 +41,7 @@ validators, get_file_extension, ) -from bbot.models.helpers import naive_datetime_validator +from bbot.models.helpers import utc_datetime_validator log = logging.getLogger("bbot.core.event") @@ -804,7 +805,7 @@ def json(self, mode="json"): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() + j["timestamp"] = utc_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1770,7 +1771,7 @@ def event_from_json(j): # accept both isoformat and unix timestamp try: - event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"], ZoneInfo("UTC")) except Exception: event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 985c845994..c7fc078a45 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,20 +1,22 @@ +from datetime import UTC from datetime import datetime from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime) -> datetime: +def utc_datetime_validator(d: datetime) -> datetime: """ - Converts all dates into UTC, then drops timezone information. - - This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + Converts all dates into UTC """ - # drop timezone info - return d.replace(tzinfo=None) + if d.tzinfo is not None: + return d.astimezone(UTC) + else: + return d.replace(tzinfo=UTC) -def naive_utc_now() -> datetime: - return naive_datetime_validator(datetime.now()) +def utc_now() -> datetime: + return datetime.now(UTC) -NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] +def utc_now_timestamp() -> datetime: + return utc_now().timestamp() diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0591a93515..356ab2e44c 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,9 +1,8 @@ import logging -from datetime import datetime from pydantic import BaseModel, ConfigDict, Field -from typing import Optional, List, Union, Annotated, get_type_hints +from typing import Optional, List, Union, Annotated -from bbot.models.helpers import NaiveUTC, naive_utc_now +from bbot.models.helpers import utc_now_timestamp log = logging.getLogger("bbot_server.models") @@ -11,14 +10,6 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, **kwargs): - ret = super().model_dump(**kwargs) - # convert datetime fields to unix timestamps - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].timestamp() - return ret - def __hash__(self): return hash(self.to_json()) @@ -29,34 +20,37 @@ def __eq__(self, other): def _indexed_fields(cls): return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) - @classmethod - def _get_type_hints(cls): - """ - Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint - """ - type_hints = get_type_hints(cls) - unwrapped_type_hints = {} - for field_name in cls.model_fields: - type_hint = type_hints[field_name] - while 1: - if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): - type_hint = type_hint.__args__[0] - else: - break - unwrapped_type_hints[field_name] = type_hint - return unwrapped_type_hints - - @classmethod - def _datetime_fields(cls): - datetime_fields = [] - for field_name, type_hint in cls._get_type_hints().items(): - if type_hint == datetime: - datetime_fields.append(field_name) - return sorted(datetime_fields) + # we keep these because they were a lot of work to make and maybe someday they'll be useful again + + # @classmethod + # def _get_type_hints(cls): + # """ + # Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + # """ + # type_hints = get_type_hints(cls) + # unwrapped_type_hints = {} + # for field_name in cls.model_fields: + # type_hint = type_hints[field_name] + # while 1: + # if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + # type_hint = type_hint.__args__[0] + # else: + # break + # unwrapped_type_hints[field_name] = type_hint + # return unwrapped_type_hints + + # @classmethod + # def _datetime_fields(cls): + # datetime_fields = [] + # for field_name, type_hint in cls._get_type_hints().items(): + # if type_hint == datetime: + # datetime_fields.append(field_name) + # return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -76,7 +70,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[float, "indexed"] - inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=utc_now_timestamp) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -99,12 +93,13 @@ def get_data(self): ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str status: Annotated[str, "indexed"] - started_at: Annotated[NaiveUTC, "indexed"] - finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + started_at: Annotated[float, "indexed"] + finished_at: Annotated[Optional[float], "indexed"] = None duration_seconds: Optional[float] = None duration: Optional[str] = None target: dict diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 9c5c8ef11a..82ccdb1f6f 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -3,13 +3,15 @@ import json import logging +from datetime import datetime from pydantic import ConfigDict from typing import List, Optional -from datetime import datetime, timezone from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime +from bbot.models.helpers import utc_now_timestamp + log = logging.getLogger("bbot_server.models") @@ -27,14 +29,6 @@ def naive_datetime_validator(d: datetime): NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - # handle datetime - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) - - class BBOTBaseModel(SQLModel): model_config = ConfigDict(extra="ignore") @@ -52,7 +46,7 @@ def validated(self): return self def to_json(self, **kwargs): - return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs) + return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) @classmethod def _pk_column_names(cls): @@ -67,11 +61,10 @@ def __eq__(self, other): ### EVENT ### + class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - if self.data is None and self.data_json is None: - raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] @@ -87,12 +80,12 @@ def get_data(self): ) id: str = Field(index=True) type: str = Field(index=True) - scope_description: str data: Optional[str] = Field(default=None, index=True) - data_json: Optional[dict] = Field(default=None) + data_json: Optional[dict] = Field(default=None, sa_type=JSON) host: Optional[str] port: Optional[int] netloc: Optional[str] + scope_description: str # store the host in reversed form for efficient lookups by domain reverse_host: Optional[str] = Field(default="", exclude=True, index=True) resolved_hosts: List = Field(default=[], sa_type=JSON) @@ -100,7 +93,8 @@ def get_data(self): web_spider_distance: int = 10 scope_distance: int = Field(default=10, index=True) scan: str = Field(index=True) - timestamp: NaiveUTC = Field(index=True) + timestamp: float = Field(index=True) + inserted_at: float = Field(default_factory=utc_now_timestamp) parent: str = Field(index=True) tags: List = Field(default=[], sa_type=JSON) module: str = Field(index=True) @@ -108,11 +102,11 @@ def get_data(self): discovery_context: str = "" discovery_path: List[str] = Field(default=[], sa_type=JSON) parent_chain: List[str] = Field(default=[], sa_type=JSON) - inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc)) ### SCAN ### + class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -127,6 +121,7 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### + class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 5e555ab0ff..6ad16620f6 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -23,7 +23,7 @@ class Mongo(BaseOutputModule): "database": "The name of the database to use", "username": "The username to use to connect to the database", "password": "The password to use to connect to the database", - "collection_prefix": "Prefix each collection with this string", + "collection_prefix": "Prefix the name of each collection with this string", } deps_pip = ["motor~=3.6.0"] @@ -62,20 +62,20 @@ async def handle_event(self, event): await self.events_collection.insert_one(event_pydantic.model_dump()) if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + scan_json = Scan(**event.data_json).model_dump() + existing_scan = await self.scans_collection.find_one({"id": event_pydantic.id}) if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + await self.scans_collection.replace_one({"id": event_pydantic.id}, scan_json) + self.verbose(f"Updated scan event with ID: {event_pydantic.id}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + self.verbose(f"Inserted new scan event with ID: {event_pydantic.id}") target_data = scan_json.get("target", {}) target = Target(**target_data) - existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + existing_target = await self.targets_collection.find_one({"hash": target.hash}) if existing_target: - await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + await self.targets_collection.replace_one({"hash": target.hash}, target.model_dump()) else: await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 7e058ae6fb..1b66569a44 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,7 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime +from datetime import datetime, UTC from collections import OrderedDict from bbot import __version__ @@ -327,8 +327,8 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now() - self.root_event.data["started_at"] = self.start_time.isoformat() + self.start_time = datetime.now(UTC) + self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +436,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now() + self.end_time = datetime.now(UTC) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) @@ -1130,9 +1130,9 @@ def json(self): j["target"] = self.preset.target.json j["preset"] = self.preset.to_dict(redact_secrets=True) if self.start_time is not None: - j["started_at"] = self.start_time.isoformat() + j["started_at"] = self.start_time.timestamp() if self.end_time is not None: - j["finished_at"] = self.end_time.isoformat() + j["finished_at"] = self.end_time.timestamp() if self.duration is not None: j["duration_seconds"] = self.duration_seconds if self.duration_human is not None: diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 58038d860b..be4e2b92bf 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -# @pytest.fixture(scope="session", autouse=True) -# def install_all_python_deps(): -# deps_pip = set() -# for module in DEFAULT_PRESET.module_loader.preloaded().values(): -# deps_pip.update(set(module.get("deps", {}).get("pip", []))) +@pytest.fixture(scope="session", autouse=True) +def install_all_python_deps(): + deps_pip = set() + for module in DEFAULT_PRESET.module_loader.preloaded().values(): + deps_pip.update(set(module.get("deps", {}).get("pip", []))) -# constraint_file = tempwordlist(get_python_constraints()) + constraint_file = tempwordlist(get_python_constraints()) -# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) + subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index d29e7e79a8..a8088be4f2 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,11 +1,23 @@ -from datetime import datetime +from datetime import datetime, UTC +from zoneinfo import ZoneInfo from bbot.models.pydantic import Event +from bbot.core.event.base import BaseEvent +from bbot.models.helpers import utc_datetime_validator from ..bbot_fixtures import * # noqa def test_pydantic_models(events): + # test datetime helpers + now = datetime.now(ZoneInfo("America/New_York")) + utc_now = utc_datetime_validator(now) + assert now.timestamp() == utc_now.timestamp() + now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + assert now2.timestamp() == utc_now.timestamp() + utc_now2 = utc_datetime_validator(now2) + assert utc_now2.timestamp() == utc_now.timestamp() + assert Event._datetime_fields() == ["inserted_at", "timestamp"] test_event = Event(**events.ipv4.json()) @@ -23,18 +35,25 @@ def test_pydantic_models(events): ] # convert events to pydantic and back, making sure they're exactly the same - for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + for event in ("ipv4", "http_response", "finding", "vulnerability", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() + event_reconstituted = BaseEvent.from_json(event_pydantic_dict) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json + + event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) + assert event_pydantic_dict == event_json + event_pydantic_dict.pop("scan") + event_pydantic_dict.pop("module") + event_pydantic_dict.pop("module_sequence") + assert event_reconstituted.json() == event_pydantic_dict # TODO: SQL diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index a2654818f4..6ee006217d 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() + timestamp = db_event.timestamp.timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index fcfed7841a..ac28e64e7b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -129,7 +129,17 @@ async def check(self, module_test, events): db_scans = await cursor.to_list(length=None) assert len(db_scans) == 1, "There should be exactly one scan" db_scan = db_scans[0] - assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + assert db_scan["id"] == main_event["scan"], "Scan id should match main event scan" + + ### TARGETS ### + + # Fetch all targets from the collection + cursor = db.get_collection(self.test_collection_prefix + "targets").find({}) + db_targets = await cursor.to_list(length=None) + assert len(db_targets) == 1, "There should be exactly one target" + db_target = db_targets[0] + scan_event = next(e for e in events if e.type == "SCAN") + assert db_target["hash"] == scan_event.data["target"]["hash"], "Target hash should match scan target hash" finally: # Clean up: Delete all documents in the collection diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py index ec80b7555d..7970627b15 100644 --- a/bbot/test/test_step_2/module_tests/test_module_sqlite.py +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -8,6 +8,8 @@ class TestSQLite(ModuleTestBase): def check(self, module_test, events): sqlite_output_file = module_test.scan.home / "output.sqlite" assert sqlite_output_file.exists(), "SQLite output file not found" + + # first connect with raw sqlite with sqlite3.connect(sqlite_output_file) as db: cursor = db.cursor() results = cursor.execute("SELECT * FROM event").fetchall() @@ -16,3 +18,15 @@ def check(self, module_test, events): assert len(results) == 1, "No scans found in SQLite database" results = cursor.execute("SELECT * FROM target").fetchall() assert len(results) == 1, "No targets found in SQLite database" + + # then connect with bbot models + from bbot.models.sql import Event + from sqlmodel import create_engine, Session, select + + engine = create_engine(f"sqlite:///{sqlite_output_file}") + + with Session(engine) as session: + statement = select(Event).where(Event.host == "evilcorp.com") + event = session.exec(statement).first() + assert event.host == "evilcorp.com", "Event host should match target host" + assert event.data == "evilcorp.com", "Event data should match target host" From 0200bd2dfb6dc3553d9b54da37393b6218275c7e Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:02:48 -0500 Subject: [PATCH 057/147] flaked --- bbot/models/helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index c7fc078a45..47959ad4ac 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,7 +1,5 @@ from datetime import UTC from datetime import datetime -from typing_extensions import Annotated -from pydantic.functional_validators import AfterValidator def utc_datetime_validator(d: datetime) -> datetime: From 70d058710c616ace8fd735790e3bc0e126fe9779 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:29:16 -0500 Subject: [PATCH 058/147] fix conflict --- bbot/core/event/base.py | 7 ++++++- bbot/test/test_step_1/test_bbot_fastapi.py | 4 ++-- bbot/test/test_step_1/test_db_models.py | 8 ++++---- bbot/test/test_step_1/test_scan.py | 2 +- bbot/test/test_step_2/module_tests/test_module_json.py | 8 ++++---- bbot/test/test_step_2/module_tests/test_module_splunk.py | 2 +- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 611711eae8..76802dac81 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1759,7 +1759,12 @@ def event_from_json(j): "context": j.get("discovery_context", None), "dummy": True, } - data = j.get("data_json", j.get("data", None)) + data = j.get("data_json", None) + if data is None: + data = j.get("data", None) + if data is None: + json_pretty = json.dumps(j, indent=2) + raise ValueError(f"data or data_json must be provided. JSON: {json_pretty}") kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index add7ad099a..feaf8686da 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -28,7 +28,7 @@ def test_bbot_multiprocess(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any(e["data"] == "test@blacklanternsecurity.com" for e in events) + assert any(e.get("data", "") == "test@blacklanternsecurity.com" for e in events) def test_bbot_fastapi(bbot_httpserver): @@ -61,7 +61,7 @@ def test_bbot_fastapi(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any(e["data"] == "test@blacklanternsecurity.com" for e in events) + assert any(e.get("data", "") == "test@blacklanternsecurity.com" for e in events) finally: with suppress(Exception): diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index a8088be4f2..c29cc09a4f 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -18,12 +18,12 @@ def test_pydantic_models(events): utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() - assert Event._datetime_fields() == ["inserted_at", "timestamp"] - test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ + "data", "host", "id", + "inserted_at", "module", "parent", "parent_uuid", @@ -40,10 +40,10 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_reconstituted = BaseEvent.from_json(event_pydantic_dict) + event_reconstituted = BaseEvent.from_json(event_pydantic.model_dump(exclude_none=True)) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) - assert isinstance(event_pydantic.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, float) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index 0102590461..fbef0ba0e0 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -144,7 +144,7 @@ async def test_python_output_matches_json(bbot_scanner): assert len(events) == 5 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert all(isinstance(e["data"]["status"], str) for e in scan_events) + assert all(isinstance(e["data_json"]["status"], str) for e in scan_events) assert len([e for e in events if e["type"] == "DNS_NAME"]) == 1 assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1 assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1 diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index bf79eeb13f..3641574213 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -23,13 +23,13 @@ def check(self, module_test, events): assert len(dns_json) == 1 dns_json = dns_json[0] scan = scan_json[0] - assert scan["data"]["name"] == module_test.scan.name - assert scan["data"]["id"] == module_test.scan.id + assert scan["data_json"]["name"] == module_test.scan.name + assert scan["data_json"]["id"] == module_test.scan.id assert scan["id"] == module_test.scan.id assert scan["uuid"] == str(module_test.scan.root_event.uuid) assert scan["parent_uuid"] == str(module_test.scan.root_event.uuid) - assert scan["data"]["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan["data"]["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["seeds"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["whitelist"] == ["blacklanternsecurity.com"] assert dns_json["data"] == dns_data assert dns_json["id"] == str(dns_event.id) assert dns_json["uuid"] == str(dns_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_splunk.py b/bbot/test/test_step_2/module_tests/test_module_splunk.py index 8366a6289b..a849055d2b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_splunk.py +++ b/bbot/test/test_step_2/module_tests/test_module_splunk.py @@ -23,7 +23,7 @@ def verify_data(self, j): if not j["index"] == "bbot_index": return False data = j["event"] - if not data["data"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": + if not data["data_json"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": return False return True From c26799470f51b8ce72219a4524644194f52a87f3 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 21:19:07 -0500 Subject: [PATCH 059/147] fix utc bug --- bbot/scanner/scanner.py | 7 ++++--- bbot/test/test_step_1/test_db_models.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 1b66569a44..2602fa776c 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,8 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime, UTC +from datetime import datetime +from zoneinfo import ZoneInfo from collections import OrderedDict from bbot import __version__ @@ -327,7 +328,7 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now(UTC) + self.start_time = datetime.now(ZoneInfo("UTC")) self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +437,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now(UTC) + self.end_time = datetime.now(ZoneInfo("UTC")) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index c29cc09a4f..9c71390696 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,4 +1,4 @@ -from datetime import datetime, UTC +from datetime import datetime from zoneinfo import ZoneInfo from bbot.models.pydantic import Event @@ -13,7 +13,7 @@ def test_pydantic_models(events): now = datetime.now(ZoneInfo("America/New_York")) utc_now = utc_datetime_validator(now) assert now.timestamp() == utc_now.timestamp() - now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + now2 = datetime.fromtimestamp(utc_now.timestamp(), ZoneInfo("UTC")) assert now2.timestamp() == utc_now.timestamp() utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() From f5e1695687f5f349beeec861af474e528d186cd5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:32:32 -0500 Subject: [PATCH 060/147] fix tests --- bbot/models/helpers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 47959ad4ac..b94bc976cc 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,5 +1,5 @@ -from datetime import UTC from datetime import datetime +from zoneinfo import ZoneInfo def utc_datetime_validator(d: datetime) -> datetime: @@ -7,13 +7,13 @@ def utc_datetime_validator(d: datetime) -> datetime: Converts all dates into UTC """ if d.tzinfo is not None: - return d.astimezone(UTC) + return d.astimezone(ZoneInfo("UTC")) else: - return d.replace(tzinfo=UTC) + return d.replace(tzinfo=ZoneInfo("UTC")) def utc_now() -> datetime: - return datetime.now(UTC) + return datetime.now(ZoneInfo("UTC")) def utc_now_timestamp() -> datetime: From 44c8c6f140b5afd8fcfbf8a1702414895ca7e903 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:30:39 -0500 Subject: [PATCH 061/147] elastic module --- bbot/modules/output/elastic.py | 22 +++ bbot/modules/output/http.py | 6 +- .../module_tests/test_module_elastic.py | 130 ++++++++++++++++++ docs/scanning/output.md | 25 ++-- 4 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 bbot/modules/output/elastic.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_elastic.py diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py new file mode 100644 index 0000000000..15bc023df8 --- /dev/null +++ b/bbot/modules/output/elastic.py @@ -0,0 +1,22 @@ +from .http import HTTP + + +class Elastic(HTTP): + watched_events = ["*"] + metadata = { + "description": "Send scan results to Elasticsearch", + "created_date": "2022-11-21", + "author": "@TheTechromancer", + } + options = { + "url": "", + "username": "elastic", + "password": "changeme", + "timeout": 10, + } + options_desc = { + "url": "Elastic URL (e.g. https://localhost:9200//_doc)", + "username": "Elastic username", + "password": "Elastic password", + "timeout": "HTTP timeout", + } diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 7d94148d72..0af65a87d2 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -1,3 +1,4 @@ +from bbot.models.pydantic import Event from bbot.modules.output.base import BaseOutputModule @@ -48,12 +49,15 @@ async def setup(self): async def handle_event(self, event): while 1: + event_json = event.json() + event_pydantic = Event(**event_json) + event_json = event_pydantic.model_dump(exclude_none=True) response = await self.helpers.request( url=self.url, method=self.method, auth=self.auth, headers=self.headers, - json=event.json(), + json=event_json, ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py new file mode 100644 index 0000000000..710c22e0f0 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -0,0 +1,130 @@ +import time +import httpx +import asyncio + +from .base import ModuleTestBase + + +class TestElastic(ModuleTestBase): + config_overrides = { + "modules": { + "elastic": { + "url": "https://localhost:9200/bbot_test_events/_doc", + "username": "elastic", + "password": "bbotislife", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Elasticsearch container + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-elastic", + "--rm", + "-e", + "ELASTIC_PASSWORD=bbotislife", + "-e", + "cluster.routing.allocation.disk.watermark.low=96%", + "-e", + "cluster.routing.allocation.disk.watermark.high=97%", + "-e", + "cluster.routing.allocation.disk.watermark.flood_stage=98%", + "-p", + "9200:9200", + "-d", + "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", + ) + + # Connect to Elasticsearch with retry logic + async with httpx.AsyncClient(verify=False) as client: + while True: + try: + # Attempt a simple operation to confirm the connection + response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) + response.raise_for_status() + break + except Exception as e: + print(f"Connection failed: {e}. Retrying...", flush=True) + time.sleep(0.5) + + # Ensure the index is empty + await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + print("Elasticsearch index cleaned up", flush=True) + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to Elasticsearch + async with httpx.AsyncClient(verify=False) as client: + + # refresh the index + await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) + + # Fetch all events from the index + response = await client.get( + f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") + ) + response_json = response.json() + import json + + print(f"response: {json.dumps(response_json, indent=2)}") + db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert ( + main_event is not None + ), "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Events don't match exactly because the elastic ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + db_event.pop("inserted_at") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the index + async with httpx.AsyncClient(verify=False) as client: + response = await client.delete( + f"https://localhost:9200/bbot_test_events", + auth=("elastic", "bbotislife"), + params={"ignore": "400,404"}, + ) + print(f"Deleted documents from index", flush=True) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/docs/scanning/output.md b/docs/scanning/output.md index dd45a5c833..16cfbd3593 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -155,15 +155,20 @@ config: ### Elasticsearch -When outputting to Elastic, use the `http` output module with the following settings (replace `` with your desired index, e.g. `bbot`): +- Step 1: Spin up a quick Elasticsearch docker image + +```bash +docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 +``` + +- Step 2: Execute a scan with `elastic` output module ```bash # send scan results directly to elasticsearch -bbot -t evilcorp.com -om http -c \ - modules.http.url=http://localhost:8000//_doc \ - modules.http.siem_friendly=true \ - modules.http.username=elastic \ - modules.http.password=changeme +# note: you can replace "bbot_events" with your own index name +bbot -t evilcorp.com -om elastic -c \ + modules.elastic.url=https://localhost:9200/bbot_events/_doc \ + modules.elastic.password=bbotislife ``` Alternatively, via a preset: @@ -171,11 +176,9 @@ Alternatively, via a preset: ```yaml title="elastic_preset.yml" config: modules: - http: - url: http://localhost:8000//_doc - siem_friendly: true - username: elastic - password: changeme + elastic: + url: http://localhost:9200/bbot_events/_doc + password: bbotislife ``` ### Splunk From eb16a35ecbc418530def31663f966daaf4cb193b Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:37:42 -0500 Subject: [PATCH 062/147] fixed conflict --- bbot/models/pydantic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 356ab2e44c..07534937a2 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -93,7 +93,6 @@ def get_data(self): ### SCAN ### - class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -117,7 +116,6 @@ def from_scan(cls, scan): ### TARGET ### - class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False From 2b2cf74bcd228bd8e0ef25cb9216c7e4dc2b58c8 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:41:51 -0500 Subject: [PATCH 063/147] fixed conflict --- bbot/models/pydantic.py | 1 - bbot/models/sql.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 07534937a2..b7c5baae9b 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -50,7 +50,6 @@ def _indexed_fields(cls): ### EVENT ### - class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 82ccdb1f6f..78465511f6 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -61,7 +61,6 @@ def __eq__(self, other): ### EVENT ### - class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -106,7 +105,6 @@ def get_data(self): ### SCAN ### - class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -121,7 +119,6 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### - class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False From 32386e09f23d58eaac2b0f764912bdb0a724812c Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:30:05 -0500 Subject: [PATCH 064/147] new module: kafka --- bbot/core/event/base.py | 2 +- bbot/modules/output/elastic.py | 14 ++- bbot/modules/output/kafka.py | 42 +++++++ bbot/scanner/scanner.py | 6 +- .../module_tests/test_module_elastic.py | 9 +- .../module_tests/test_module_kafka.py | 108 ++++++++++++++++++ 6 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 bbot/modules/output/kafka.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_kafka.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 76802dac81..05f1a91271 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -814,7 +814,7 @@ def json(self, mode="json"): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - j.update({"tags": list(self.tags)}) + j.update({"tags": sorted(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 15bc023df8..42c331c516 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -2,6 +2,10 @@ class Elastic(HTTP): + """ + docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 + """ + watched_events = ["*"] metadata = { "description": "Send scan results to Elasticsearch", @@ -9,9 +13,9 @@ class Elastic(HTTP): "author": "@TheTechromancer", } options = { - "url": "", + "url": "https://localhost:9200/bbot_events/_doc", "username": "elastic", - "password": "changeme", + "password": "bbotislife", "timeout": 10, } options_desc = { @@ -20,3 +24,9 @@ class Elastic(HTTP): "password": "Elastic password", "timeout": "HTTP timeout", } + + async def cleanup(self): + # refresh the index + doc_regex = self.helpers.re.compile(r"/[^/]+$") + refresh_url = doc_regex.sub("/_refresh", self.url) + await self.helpers.request(refresh_url, auth=self.auth) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py new file mode 100644 index 0000000000..5b2db13d60 --- /dev/null +++ b/bbot/modules/output/kafka.py @@ -0,0 +1,42 @@ +import json +from aiokafka import AIOKafkaProducer + +from bbot.modules.output.base import BaseOutputModule + + +class Kafka(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a Kafka topic", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + options_desc = { + "bootstrap_servers": "A comma-separated list of Kafka server addresses", + "topic": "The Kafka topic to publish events to", + } + deps_pip = ["aiokafka~=0.12.0"] + + async def setup(self): + self.bootstrap_servers = self.config.get("bootstrap_servers", "localhost:9092") + self.topic = self.config.get("topic", "bbot_events") + self.producer = AIOKafkaProducer(bootstrap_servers=self.bootstrap_servers) + + # Start the producer + await self.producer.start() + self.verbose("Kafka producer started successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + await self.producer.send_and_wait(self.topic, event_data) + + async def cleanup(self): + # Stop the producer + await self.producer.stop() + self.verbose("Kafka producer stopped successfully") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 2602fa776c..8e99f104dd 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -865,15 +865,15 @@ async def _cleanup(self): if not self._cleanedup: self._cleanedup = True self.status = "CLEANING_UP" + # clean up modules + for mod in self.modules.values(): + await mod._cleanup() # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - # clean up modules - for mod in self.modules.values(): - await mod._cleanup() with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 710c22e0f0..2f8891a640 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,12 +48,11 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - print(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) time.sleep(0.5) # Ensure the index is empty await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) - print("Elasticsearch index cleaned up", flush=True) async def check(self, module_test, events): try: @@ -65,17 +64,11 @@ async def check(self, module_test, events): # Connect to Elasticsearch async with httpx.AsyncClient(verify=False) as client: - # refresh the index - await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) - # Fetch all events from the index response = await client.get( f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") ) response_json = response.json() - import json - - print(f"response: {json.dumps(response_json, indent=2)}") db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] # make sure we have the same number of events diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py new file mode 100644 index 0000000000..6a81173561 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -0,0 +1,108 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestKafka(ModuleTestBase): + config_overrides = { + "modules": { + "kafka": { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Zookeeper + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-zookeeper", "-p", "2181:2181", "zookeeper:3.9" + ) + + # Wait for Zookeeper to be ready + while True: + try: + # Attempt to connect to Zookeeper with a timeout + reader, writer = await asyncio.wait_for(asyncio.open_connection("localhost", 2181), timeout=0.5) + break # Exit the loop if the connection is successful + except Exception as e: + self.log.verbose(f"Waiting for Zookeeper to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + finally: + with suppress(Exception): + writer.close() + await writer.wait_closed() + + # Start Kafka using wurstmeister/kafka + await asyncio.create_subprocess_exec( + "docker", + "run", + "-d", + "--rm", + "--name", + "bbot-test-kafka", + "--link", + "bbot-test-zookeeper:zookeeper", + "-e", + "KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181", + "-e", + "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092", + "-e", + "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092", + "-e", + "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1", + "-p", + "9092:9092", + "wurstmeister/kafka", + ) + + from aiokafka import AIOKafkaConsumer + + # Wait for Kafka to be ready + while True: + try: + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() + break # Exit the loop if the consumer starts successfully + except Exception as e: + self.log.verbose(f"Waiting for Kafka to be ready: {e}") + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + await asyncio.sleep(0.5) # Wait a bit before retrying + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from Kafka + kafka_events = [] + async for msg in self.consumer: + event_data = json.loads(msg.value.decode("utf-8")) + kafka_events.append(event_data) + if len(kafka_events) >= len(events_json): + break + + kafka_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == kafka_events, "Events do not match" + + finally: + # Clean up: Stop the Kafka consumer + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + # Stop Kafka and Zookeeper containers + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-kafka", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-zookeeper", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 2f1cf400f41a4f9367371edb08373de9827ca215 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:56:45 -0500 Subject: [PATCH 065/147] fix elastic tests --- bbot/test/test_step_2/module_tests/test_module_elastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 2f8891a640..db9f2359f7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,7 +48,7 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Ensure the index is empty @@ -117,7 +117,7 @@ async def check(self, module_test, events): auth=("elastic", "bbotislife"), params={"ignore": "400,404"}, ) - print(f"Deleted documents from index", flush=True) + self.log.verbose(f"Deleted documents from index") await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) From baa58ad50b9de0732b491cb5df94f89b5d08ae1b Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:29 -0500 Subject: [PATCH 066/147] better error handling in module --- bbot/modules/output/kafka.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 5b2db13d60..0c28075450 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -34,7 +34,12 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_data = json.dumps(event_json).encode("utf-8") - await self.producer.send_and_wait(self.topic, event_data) + while 1: + try: + await self.producer.send_and_wait(self.topic, event_data) + except Exception as e: + self.warning(f"Error sending event to Kafka: {e}, retrying...") + await self.helpers.sleep(1) async def cleanup(self): # Stop the producer From e757636c9953fb62a350a07f9f5186a5b34bccc5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:39 -0500 Subject: [PATCH 067/147] better error handling in module --- bbot/modules/output/kafka.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 0c28075450..0a31e0be12 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -37,6 +37,7 @@ async def handle_event(self, event): while 1: try: await self.producer.send_and_wait(self.topic, event_data) + break except Exception as e: self.warning(f"Error sending event to Kafka: {e}, retrying...") await self.helpers.sleep(1) From 260e9cacdd3edfd1b9be3ae98079de1d7f456bbf Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:58:06 -0500 Subject: [PATCH 068/147] better mongo error handling --- bbot/modules/output/mongo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 6ad16620f6..118ca82378 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -59,7 +59,13 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) - await self.events_collection.insert_one(event_pydantic.model_dump()) + while 1: + try: + await self.events_collection.insert_one(event_pydantic.model_dump()) + break + except Exception as e: + self.warning(f"Error inserting event into MongoDB: {e}, retrying...") + await self.helpers.sleep(1) if event.type == "SCAN": scan_json = Scan(**event.data_json).model_dump() From 9823dee95c28e6e10c6df141d92569de97939bf5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 11:06:47 -0500 Subject: [PATCH 069/147] fix sql tests --- bbot/test/test_step_2/module_tests/test_module_mysql.py | 2 +- bbot/test/test_step_2/module_tests/test_module_postgres.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_mysql.py b/bbot/test/test_step_2/module_tests/test_module_mysql.py index 709b3ca287..de30c58f9f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mysql.py +++ b/bbot/test/test_step_2/module_tests/test_module_mysql.py @@ -28,7 +28,7 @@ async def setup_before_prep(self, module_test): stdout, stderr = await process.communicate() # wait for the container to start - await self.wait_for_port(3306) + await self.wait_for_port_open(3306) if process.returncode != 0: self.log.error(f"Failed to start MySQL server: {stderr.decode()}") diff --git a/bbot/test/test_step_2/module_tests/test_module_postgres.py b/bbot/test/test_step_2/module_tests/test_module_postgres.py index c1d7b102cb..8c52eabebe 100644 --- a/bbot/test/test_step_2/module_tests/test_module_postgres.py +++ b/bbot/test/test_step_2/module_tests/test_module_postgres.py @@ -25,7 +25,7 @@ async def setup_before_prep(self, module_test): ) # wait for the container to start - await self.wait_for_port(5432) + await self.wait_for_port_open(5432) if process.returncode != 0: self.log.error("Failed to start PostgreSQL server") From 9955c5f097579acc9c2bd16f3ded7876c946d905 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 12:07:52 -0500 Subject: [PATCH 070/147] allow extra second for port to come online --- bbot/test/test_step_2/module_tests/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index 697dc2a235..251cd6ad1a 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -158,6 +158,8 @@ async def wait_for_port_open(self, port): while not await self.is_port_open("localhost", port): self.log.verbose(f"Waiting for port {port} to be open...") await asyncio.sleep(0.5) + # allow an extra second for things to settle + await asyncio.sleep(1) async def is_port_open(self, host, port): try: From 1b5ff6058181cf4218601b76b3fdb64ec9d39b0e Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 23 Dec 2024 16:07:01 -0500 Subject: [PATCH 071/147] fix Event.hash() --- bbot/models/pydantic.py | 4 ++++ docs/scanning/configuration.md | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index b7c5baae9b..d9f8cde643 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,3 +1,4 @@ +import json import logging from pydantic import BaseModel, ConfigDict, Field from typing import Optional, List, Union, Annotated @@ -10,6 +11,9 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") + def to_json(self, **kwargs): + return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) + def __hash__(self): return hash(self.to_json()) diff --git a/docs/scanning/configuration.md b/docs/scanning/configuration.md index 4ce1a92212..a7c3449a8c 100644 --- a/docs/scanning/configuration.md +++ b/docs/scanning/configuration.md @@ -30,7 +30,7 @@ You can specify config options either via the command line or the config. For ex bbot -t evilcorp.com -c http_proxy=http://127.0.0.1:8080 ``` -Or, in `~/.config/bbot/config.yml`: +Or, in `~/.config/bbot/bbot.yml`: ```yaml title="~/.bbot/config/bbot.yml" http_proxy: http://127.0.0.1:8080 From 48b2086a9050c3a80fa72850661d58db2877f9fa Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 23 Dec 2024 16:11:46 -0500 Subject: [PATCH 072/147] fix Event.hash() --- bbot/models/pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index d9f8cde643..a8c983c236 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -12,7 +12,7 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") def to_json(self, **kwargs): - return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) + return json.dumps(self.model_dump(), sort_keys=True, **kwargs) def __hash__(self): return hash(self.to_json()) From 07a6c48d241d93afccf328cc96fe195b7a3a76b1 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 22 Jan 2025 11:44:34 -0500 Subject: [PATCH 073/147] reverse host --- bbot/models/pydantic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index a8c983c236..5d766cbeba 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -11,6 +11,11 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") + def __init__(self, **data): + super().__init__(**data) + if getattr(self, "host", ""): + self.reverse_host = self.host[::-1] + def to_json(self, **kwargs): return json.dumps(self.model_dump(), sort_keys=True, **kwargs) @@ -83,11 +88,6 @@ class Event(BBOTBaseModel): discovery_path: List[str] = [] parent_chain: List[str] = [] - def __init__(self, **data): - super().__init__(**data) - if self.host: - self.reverse_host = self.host[::-1] - def get_data(self): if self.data is not None: return self.data From 09e861d71384d5bdf700d9f207c8d1cf67a7eb04 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 8 Feb 2025 12:21:53 -0500 Subject: [PATCH 074/147] archived --- bbot/models/pydantic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 5d766cbeba..4524885ff2 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -87,6 +87,7 @@ class Event(BBOTBaseModel): discovery_context: str = "" discovery_path: List[str] = [] parent_chain: List[str] = [] + archived: bool = False def get_data(self): if self.data is not None: From cd45037561e24b24c341d66fc00ecaac9310fc3a Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 5 Mar 2025 09:51:25 -0500 Subject: [PATCH 075/147] fix conflict --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c9112028b6..9d2579b998 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,6 @@ [tool.poetry] name = "bbot" -<<<<<<< HEAD version = "3.0.0" -======= -version = "2.4.0" ->>>>>>> dev description = "OSINT automation for hackers." authors = [ "TheTechromancer", From a5959a6bdeb5c78cf3fc03c220732edeb15683bf Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 18 Mar 2025 11:15:59 -0400 Subject: [PATCH 076/147] add hashing to pydantic model --- bbot/models/pydantic.py | 3 +++ bbot/test/test_step_1/test_db_models.py | 36 +++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 4524885ff2..499ee36c88 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -94,6 +94,9 @@ def get_data(self): return self.data return self.data_json + def __hash__(self): + return hash(self.id) + ### SCAN ### diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 9c71390696..23187cae42 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -7,7 +7,7 @@ from ..bbot_fixtures import * # noqa -def test_pydantic_models(events): +def test_pydantic_models(events, bbot_scanner): # test datetime helpers now = datetime.now(ZoneInfo("America/New_York")) @@ -48,12 +48,44 @@ def test_pydantic_models(events): assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) - event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) + event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at", "archived"]) assert event_pydantic_dict == event_json event_pydantic_dict.pop("scan") event_pydantic_dict.pop("module") event_pydantic_dict.pop("module_sequence") assert event_reconstituted.json() == event_pydantic_dict + # make sure we can dedupe events by their id + scan = bbot_scanner() + event1 = scan.make_event("1.2.3.4", parent=scan.root_event) + event2 = scan.make_event("1.2.3.4", parent=scan.root_event) + event3 = scan.make_event("evilcorp.com", parent=scan.root_event) + event4 = scan.make_event("evilcorp.com", parent=scan.root_event) + # first two events are IPS + assert event1.uuid != event2.uuid + assert event1.id == event2.id + # second two are DNS + assert event2.uuid != event3.uuid + assert event2.id != event3.id + assert event3.uuid != event4.uuid + assert event3.id == event4.id + + event_set_bbot = { + event1, + event2, + event3, + event4, + } + assert len(event_set_bbot) == 2 + assert set([e.type for e in event_set_bbot]) == {"IP_ADDRESS", "DNS_NAME"} + + event_set_pydantic = { + Event(**event1.json()), + Event(**event2.json()), + Event(**event3.json()), + Event(**event4.json()), + } + assert len(event_set_pydantic) == 2 + assert set([e.type for e in event_set_pydantic]) == {"IP_ADDRESS", "DNS_NAME"} # TODO: SQL From 6557fb2df46f05f69192493e28ceb48e8deb0349 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 18 Mar 2025 11:17:06 -0400 Subject: [PATCH 077/147] ruffed --- bbot/models/pydantic.py | 3 +++ bbot/models/sql.py | 3 +++ bbot/test/bbot_fixtures.py | 1 - bbot/test/test_step_1/test_db_models.py | 6 ++++-- .../module_tests/test_module_elastic.py | 21 +++++++++---------- .../module_tests/test_module_mongo.py | 7 +++---- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 499ee36c88..eed14356d8 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -59,6 +59,7 @@ def _indexed_fields(cls): ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -100,6 +101,7 @@ def __hash__(self): ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -123,6 +125,7 @@ def from_scan(cls, scan): ### TARGET ### + class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 78465511f6..82ccdb1f6f 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -61,6 +61,7 @@ def __eq__(self, other): ### EVENT ### + class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -105,6 +106,7 @@ def get_data(self): ### SCAN ### + class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -119,6 +121,7 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### + class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 58038d860b..b12654e41e 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -147,7 +147,6 @@ def helpers(scan): @pytest.fixture def events(scan): - dummy_module = scan._make_dummy_module("dummy_module") class bbot_events: diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 23187cae42..0e175f3418 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -8,7 +8,6 @@ def test_pydantic_models(events, bbot_scanner): - # test datetime helpers now = datetime.now(ZoneInfo("America/New_York")) utc_now = utc_datetime_validator(now) @@ -48,7 +47,9 @@ def test_pydantic_models(events, bbot_scanner): assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) - event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at", "archived"]) + event_pydantic_dict = event_pydantic.model_dump( + exclude_none=True, exclude=["reverse_host", "inserted_at", "archived"] + ) assert event_pydantic_dict == event_json event_pydantic_dict.pop("scan") event_pydantic_dict.pop("module") @@ -88,4 +89,5 @@ def test_pydantic_models(events, bbot_scanner): assert len(event_set_pydantic) == 2 assert set([e.type for e in event_set_pydantic]) == {"IP_ADDRESS", "DNS_NAME"} + # TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index db9f2359f7..550371f70f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -52,7 +52,7 @@ async def setup_before_prep(self, module_test): time.sleep(0.5) # Ensure the index is empty - await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + await client.delete("https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) async def check(self, module_test, events): try: @@ -63,10 +63,9 @@ async def check(self, module_test, events): # Connect to Elasticsearch async with httpx.AsyncClient(verify=False) as client: - # Fetch all events from the index response = await client.get( - f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") + "https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") ) response_json = response.json() db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] @@ -91,15 +90,15 @@ async def check(self, module_test, events): ), None, ) - assert ( - main_event is not None - ), "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + assert main_event is not None, ( + "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + ) # Ensure it has the reverse_host attribute expected_reverse_host = "blacklanternsecurity.com"[::-1] - assert ( - main_event.get("reverse_host") == expected_reverse_host - ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + assert main_event.get("reverse_host") == expected_reverse_host, ( + f"reverse_host attribute is not correct, expected {expected_reverse_host}" + ) # Events don't match exactly because the elastic ones have reverse_host and inserted_at assert events_json != db_events_pydantic @@ -113,11 +112,11 @@ async def check(self, module_test, events): # Clean up: Delete all documents in the index async with httpx.AsyncClient(verify=False) as client: response = await client.delete( - f"https://localhost:9200/bbot_test_events", + "https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife"), params={"ignore": "400,404"}, ) - self.log.verbose(f"Deleted documents from index") + self.log.verbose("Deleted documents from index") await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index ac28e64e7b..3a85e1ee56 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -20,7 +20,6 @@ class TestMongo(ModuleTestBase): skip_distro_tests = True async def setup_before_prep(self, module_test): - await asyncio.create_subprocess_exec( "docker", "run", @@ -110,9 +109,9 @@ async def check(self, module_test, events): # Ensure it has the reverse_host attribute expected_reverse_host = "blacklanternsecurity.com"[::-1] - assert ( - main_event.get("reverse_host") == expected_reverse_host - ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + assert main_event.get("reverse_host") == expected_reverse_host, ( + f"reverse_host attribute is not correct, expected {expected_reverse_host}" + ) # Events don't match exactly because the mongo ones have reverse_host and inserted_at assert events_json != db_events_pydantic From dcf70f687e4043e0a82b05892e059e2f791ea7ae Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 18 Mar 2025 11:43:42 -0400 Subject: [PATCH 078/147] fix elastic --- bbot/modules/output/elastic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 42c331c516..064c00af7c 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -7,7 +7,7 @@ class Elastic(HTTP): """ watched_events = ["*"] - metadata = { + meta = { "description": "Send scan results to Elasticsearch", "created_date": "2022-11-21", "author": "@TheTechromancer", From f3f5fecba038aba34d0505b57ec602d772d2959f Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 18 Mar 2025 12:54:57 -0400 Subject: [PATCH 079/147] timezone things --- bbot/modules/output/nmap_xml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/nmap_xml.py b/bbot/modules/output/nmap_xml.py index 52698e0de8..9a0cee27eb 100644 --- a/bbot/modules/output/nmap_xml.py +++ b/bbot/modules/output/nmap_xml.py @@ -1,6 +1,7 @@ import sys from xml.dom import minidom from datetime import datetime +from zoneinfo import ZoneInfo from xml.etree.ElementTree import Element, SubElement, tostring from bbot import __version__ @@ -76,7 +77,7 @@ async def handle_event(self, event): async def report(self): scan_start_time = str(int(self.scan.start_time.timestamp())) scan_start_time_str = self.scan.start_time.strftime("%a %b %d %H:%M:%S %Y") - scan_end_time = datetime.now() + scan_end_time = datetime.now(ZoneInfo("UTC")) scan_end_time_str = scan_end_time.strftime("%a %b %d %H:%M:%S %Y") scan_end_time_timestamp = str(scan_end_time.timestamp()) scan_duration = scan_end_time - self.scan.start_time From 03eb3cab374629aee6558f9448cde59967065d4d Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 19 Mar 2025 13:36:14 -0400 Subject: [PATCH 080/147] fix elastic --- bbot/modules/subdomaincenter.py | 1 + bbot/test/test_step_2/module_tests/test_module_elastic.py | 1 + docs/scanning/output.md | 6 +++--- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bbot/modules/subdomaincenter.py b/bbot/modules/subdomaincenter.py index 077ccf1a6c..c58a0779df 100644 --- a/bbot/modules/subdomaincenter.py +++ b/bbot/modules/subdomaincenter.py @@ -1,6 +1,7 @@ from bbot.modules.templates.subdomain_enum import subdomain_enum + class subdomaincenter(subdomain_enum): flags = ["subdomain-enum", "passive", "safe"] watched_events = ["DNS_NAME"] diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 550371f70f..b3af6e694a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -105,6 +105,7 @@ async def check(self, module_test, events): for db_event in db_events_pydantic: db_event.pop("reverse_host") db_event.pop("inserted_at") + db_event.pop("archived") # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" diff --git a/docs/scanning/output.md b/docs/scanning/output.md index f064dab242..6063c0a893 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -165,9 +165,9 @@ docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/sha ```bash # send scan results directly to elasticsearch -# note: you can replace "bbot_events" with your own index name +# note: you can replace "bbot" with your own index name bbot -t evilcorp.com -om elastic -c \ - modules.elastic.url=https://localhost:9200/bbot_events/_doc \ + modules.elastic.url=https://localhost:9200/bbot/_doc \ modules.elastic.password=bbotislife ``` @@ -177,7 +177,7 @@ Alternatively, via a preset: config: modules: elastic: - url: http://localhost:9200/bbot_events/_doc + url: http://localhost:9200/bbot/_doc password: bbotislife ``` From a6576691733b9b8ae6aaa5c271f9daa48d0a6b55 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 19 Mar 2025 14:27:36 -0400 Subject: [PATCH 081/147] ruffed --- bbot/modules/subdomaincenter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bbot/modules/subdomaincenter.py b/bbot/modules/subdomaincenter.py index c58a0779df..077ccf1a6c 100644 --- a/bbot/modules/subdomaincenter.py +++ b/bbot/modules/subdomaincenter.py @@ -1,7 +1,6 @@ from bbot.modules.templates.subdomain_enum import subdomain_enum - class subdomaincenter(subdomain_enum): flags = ["subdomain-enum", "passive", "safe"] watched_events = ["DNS_NAME"] From 2e835febe13ec05b47faca51df666e40b90ad231 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 20 Mar 2025 11:13:20 -0400 Subject: [PATCH 082/147] delete generic ssrf --- bbot/modules/generic_ssrf.py | 262 ------------------ .../module_tests/test_module_generic_ssrf.py | 88 ------ 2 files changed, 350 deletions(-) delete mode 100644 bbot/modules/generic_ssrf.py delete mode 100644 bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py diff --git a/bbot/modules/generic_ssrf.py b/bbot/modules/generic_ssrf.py deleted file mode 100644 index 6ccde510b9..0000000000 --- a/bbot/modules/generic_ssrf.py +++ /dev/null @@ -1,262 +0,0 @@ -from bbot.errors import InteractshError -from bbot.modules.base import BaseModule - - -ssrf_params = [ - "Dest", - "Redirect", - "URI", - "Path", - "Continue", - "URL", - "Window", - "Next", - "Data", - "Reference", - "Site", - "HTML", - "Val", - "Validate", - "Domain", - "Callback", - "Return", - "Page", - "Feed", - "Host", - "Port", - "To", - "Out", - "View", - "Dir", - "Show", - "Navigation", - "Open", -] - - -class BaseSubmodule: - technique_description = "base technique description" - severity = "INFO" - paths = [] - - def __init__(self, generic_ssrf): - self.generic_ssrf = generic_ssrf - self.test_paths = self.create_paths() - - def set_base_url(self, event): - return f"{event.parsed_url.scheme}://{event.parsed_url.netloc}" - - def create_paths(self): - return self.paths - - async def test(self, event): - base_url = self.set_base_url(event) - for test_path_result in self.test_paths: - for lower in [True, False]: - test_path = test_path_result[0] - if lower: - test_path = test_path.lower() - subdomain_tag = test_path_result[1] - test_url = f"{base_url}{test_path}" - self.generic_ssrf.debug(f"Sending request to URL: {test_url}") - r = await self.generic_ssrf.helpers.curl(url=test_url) - if r: - self.process(event, r, subdomain_tag) - - def process(self, event, r, subdomain_tag): - response_token = self.generic_ssrf.interactsh_domain.split(".")[0][::-1] - if response_token in r: - echoed_response = True - else: - echoed_response = False - - self.generic_ssrf.interactsh_subdomain_tags[subdomain_tag] = ( - event, - self.technique_description, - self.severity, - echoed_response, - ) - - -class Generic_SSRF(BaseSubmodule): - technique_description = "Generic SSRF (GET)" - severity = "HIGH" - - def set_base_url(self, event): - return event.data - - def create_paths(self): - test_paths = [] - for param in ssrf_params: - query_string = "" - subdomain_tag = self.generic_ssrf.helpers.rand_string(4) - ssrf_canary = f"{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" - self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param - query_string += f"{param}=http://{ssrf_canary}&" - test_paths.append((f"?{query_string.rstrip('&')}", subdomain_tag)) - return test_paths - - -class Generic_SSRF_POST(BaseSubmodule): - technique_description = "Generic SSRF (POST)" - severity = "HIGH" - - def set_base_url(self, event): - return event.data - - async def test(self, event): - test_url = f"{event.data}" - - post_data = {} - for param in ssrf_params: - subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) - self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param - post_data[param] = f"http://{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" - - subdomain_tag_lower = self.generic_ssrf.helpers.rand_string(4, digits=False) - post_data_lower = { - k.lower(): f"http://{subdomain_tag_lower}.{self.generic_ssrf.interactsh_domain}" - for k, v in post_data.items() - } - - post_data_list = [(subdomain_tag, post_data), (subdomain_tag_lower, post_data_lower)] - - for tag, pd in post_data_list: - r = await self.generic_ssrf.helpers.curl(url=test_url, method="POST", post_data=pd) - self.process(event, r, tag) - - -class Generic_XXE(BaseSubmodule): - technique_description = "Generic XXE" - severity = "HIGH" - paths = None - - async def test(self, event): - rand_entity = self.generic_ssrf.helpers.rand_string(4, digits=False) - subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) - - post_body = f""" - - -]> -&{rand_entity};""" - test_url = event.parsed_url.geturl() - r = await self.generic_ssrf.helpers.curl( - url=test_url, method="POST", raw_body=post_body, headers={"Content-type": "application/xml"} - ) - if r: - self.process(event, r, subdomain_tag) - - -class generic_ssrf(BaseModule): - watched_events = ["URL"] - produced_events = ["VULNERABILITY"] - flags = ["active", "aggressive", "web-thorough"] - meta = {"description": "Check for generic SSRFs", "created_date": "2022-07-30", "author": "@liquidsec"} - options = { - "skip_dns_interaction": False, - } - options_desc = { - "skip_dns_interaction": "Do not report DNS interactions (only HTTP interaction)", - } - in_scope_only = True - - deps_apt = ["curl"] - - async def setup(self): - self.submodules = {} - self.interactsh_subdomain_tags = {} - self.parameter_subdomain_tags_map = {} - self.severity = None - self.skip_dns_interaction = self.config.get("skip_dns_interaction", False) - - if self.scan.config.get("interactsh_disable", False) is False: - try: - self.interactsh_instance = self.helpers.interactsh() - self.interactsh_domain = await self.interactsh_instance.register(callback=self.interactsh_callback) - except InteractshError as e: - self.warning(f"Interactsh failure: {e}") - return False - else: - self.warning( - "The generic_ssrf module is completely dependent on interactsh to function, but it is disabled globally. Aborting." - ) - return None - - # instantiate submodules - for m in BaseSubmodule.__subclasses__(): - if m.__name__.startswith("Generic_"): - self.verbose(f"Starting generic_ssrf submodule: {m.__name__}") - self.submodules[m.__name__] = m(self) - - return True - - async def handle_event(self, event): - for s in self.submodules.values(): - await s.test(event) - - async def interactsh_callback(self, r): - protocol = r.get("protocol").upper() - if protocol == "DNS" and self.skip_dns_interaction: - return - - full_id = r.get("full-id", None) - subdomain_tag = full_id.split(".")[0] - - if full_id: - if "." in full_id: - match = self.interactsh_subdomain_tags.get(subdomain_tag) - if not match: - return - matched_event = match[0] - matched_technique = match[1] - matched_severity = match[2] - matched_echoed_response = str(match[3]) - - triggering_param = self.parameter_subdomain_tags_map.get(subdomain_tag, None) - description = f"Out-of-band interaction: [{matched_technique}]" - if triggering_param: - self.debug(f"Found triggering parameter: {triggering_param}") - description += f" [Triggering Parameter: {triggering_param}]" - description += f" [{protocol}] Echoed Response: {matched_echoed_response}" - - self.debug(f"Emitting event with description: {description}") # Debug the final description - - event_type = "VULNERABILITY" if protocol == "HTTP" else "FINDING" - event_data = { - "host": str(matched_event.host), - "url": matched_event.data, - "description": description, - } - if protocol == "HTTP": - event_data["severity"] = matched_severity - - await self.emit_event( - event_data, - event_type, - matched_event, - context=f"{{module}} scanned {matched_event.data} and detected {{event.type}}: {matched_technique}", - ) - else: - # this is likely caused by something trying to resolve the base domain first and can be ignored - self.debug("skipping result because subdomain tag was missing") - - async def cleanup(self): - if self.scan.config.get("interactsh_disable", False) is False: - try: - await self.interactsh_instance.deregister() - self.debug( - f"successfully deregistered interactsh session with correlation_id {self.interactsh_instance.correlation_id}" - ) - except InteractshError as e: - self.warning(f"Interactsh failure: {e}") - - async def finish(self): - if self.scan.config.get("interactsh_disable", False) is False: - await self.helpers.sleep(5) - try: - for r in await self.interactsh_instance.poll(): - await self.interactsh_callback(r) - except InteractshError as e: - self.debug(f"Error in interact.sh: {e}") diff --git a/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py b/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py deleted file mode 100644 index c0911fd661..0000000000 --- a/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py +++ /dev/null @@ -1,88 +0,0 @@ -import re -import asyncio -from werkzeug.wrappers import Response - -from .base import ModuleTestBase - - -def extract_subdomain_tag(data): - pattern = r"http://([a-z0-9]{4})\.fakedomain\.fakeinteractsh\.com" - match = re.search(pattern, data) - if match: - return match.group(1) - - -class TestGeneric_SSRF(ModuleTestBase): - targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "generic_ssrf"] - - def request_handler(self, request): - subdomain_tag = None - - if request.method == "GET": - subdomain_tag = extract_subdomain_tag(request.full_path) - elif request.method == "POST": - subdomain_tag = extract_subdomain_tag(request.data.decode()) - if subdomain_tag: - asyncio.run( - self.interactsh_mock_instance.mock_interaction( - subdomain_tag, msg=f"{request.method}: {request.data.decode()}" - ) - ) - - return Response("alive", status=200) - - async def setup_before_prep(self, module_test): - self.interactsh_mock_instance = module_test.mock_interactsh("generic_ssrf") - module_test.monkeypatch.setattr( - module_test.scan.helpers, "interactsh", lambda *args, **kwargs: self.interactsh_mock_instance - ) - - async def setup_after_prep(self, module_test): - expect_args = re.compile("/") - module_test.set_expect_requests_handler(expect_args=expect_args, request_handler=self.request_handler) - - def check(self, module_test, events): - total_vulnerabilities = 0 - total_findings = 0 - - for e in events: - if e.type == "VULNERABILITY": - total_vulnerabilities += 1 - elif e.type == "FINDING": - total_findings += 1 - - assert total_vulnerabilities == 30, "Incorrect number of vulnerabilities detected" - assert total_findings == 30, "Incorrect number of findings detected" - - assert any( - e.type == "VULNERABILITY" - and "Out-of-band interaction: [Generic SSRF (GET)]" - and "[Triggering Parameter: Dest]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (GET)" - assert any( - e.type == "VULNERABILITY" and "Out-of-band interaction: [Generic SSRF (POST)]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (POST)" - assert any( - e.type == "VULNERABILITY" and "Out-of-band interaction: [Generic XXE] [HTTP]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (XXE)" - - -class TestGeneric_SSRF_httponly(TestGeneric_SSRF): - config_overrides = {"modules": {"generic_ssrf": {"skip_dns_interaction": True}}} - - def check(self, module_test, events): - total_vulnerabilities = 0 - total_findings = 0 - - for e in events: - if e.type == "VULNERABILITY": - total_vulnerabilities += 1 - elif e.type == "FINDING": - total_findings += 1 - - assert total_vulnerabilities == 30, "Incorrect number of vulnerabilities detected" - assert total_findings == 0, "Incorrect number of findings detected" From 288caba044150366e4889241dcf9bcf6eb5f7e54 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 16 Apr 2025 11:00:44 -0400 Subject: [PATCH 083/147] strict dns scope --- bbot/cli.py | 2 +- bbot/defaults.yml | 6 +++--- bbot/models/pydantic.py | 2 +- bbot/models/sql.py | 2 +- bbot/scanner/preset/args.py | 4 ++-- bbot/scanner/preset/preset.py | 6 +++--- bbot/scanner/target.py | 10 +++++----- bbot/test/test_step_1/test_presets.py | 2 +- bbot/test/test_step_1/test_target.py | 8 ++++---- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index 333ab8c202..779a5ab557 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -198,7 +198,7 @@ async def _main(): if sys.stdin.isatty(): # warn if any targets belong directly to a cloud provider - if not scan.preset.strict_scope: + if not scan.preset.strict_dns_scope: for event in scan.target.seeds.event_seeds: if event.type == "DNS_NAME": cloudcheck_result = scan.helpers.cloudcheck(event.host) diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 1a1aa62bb0..165974bdb4 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -14,9 +14,9 @@ folder_blobs: false ### SCOPE ### scope: - # strict scope means only exact DNS names are considered in-scope - # subdomains are not included unless they are explicitly provided in the target list - strict: false + # strict DNS scope means only exact DNS names are considered in-scope + # subdomains are not included unless they are explicitly whitelisted + strict_dns: false # Filter by scope distance which events are displayed in the output # 0 == show only in-scope events (affiliates are always shown) # 1 == show all events up to distance-1 (1 hop from target) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index eed14356d8..816a10b0cd 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -128,7 +128,7 @@ def from_scan(cls, scan): class Target(BBOTBaseModel): name: str = "Default Target" - strict_scope: bool = False + strict_dns_scope: bool = False seeds: List = [] whitelist: List = [] blacklist: List = [] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 82ccdb1f6f..8e5c12bc15 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -124,7 +124,7 @@ class Scan(BBOTBaseModel, table=True): class Target(BBOTBaseModel, table=True): name: str = "Default Target" - strict_scope: bool = False + strict_dns_scope: bool = False seeds: List = Field(default=[], sa_type=JSON) whitelist: List = Field(default=None, sa_type=JSON) blacklist: List = Field(default=[], sa_type=JSON) diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py index 2d07daafad..5ea6950bfe 100644 --- a/bbot/scanner/preset/args.py +++ b/bbot/scanner/preset/args.py @@ -197,8 +197,8 @@ def preset_from_args(self): raise BBOTArgumentError(f'Error parsing command-line config option: "{config_arg}": {e}') # strict scope - if self.parsed.strict_scope: - args_preset.core.merge_custom({"scope": {"strict": True}}) + if self.parsed.strict_dns_scope: + args_preset.core.merge_custom({"scope": {"strict_dns": True}}) return args_preset diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 5f40b93dd9..83df3bd940 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -483,7 +483,7 @@ def bake(self, scan=None): from bbot.scanner.target import BBOTTarget baked_preset._target = BBOTTarget( - *list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, strict_scope=self.strict_scope + *list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, strict_dns_scope=self.strict_dns_scope ) # evaluate conditions @@ -558,8 +558,8 @@ def scope_config(self): return self.config.get("scope", {}) @property - def strict_scope(self): - return self.scope_config.get("strict", False) + def strict_dns_scope(self): + return self.scope_config.get("strict_dns", False) def apply_log_level(self, apply_core=False): # silent takes precedence diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index e7a3679ea3..6c58f363d7 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -214,12 +214,12 @@ class BBOTTarget: Provides high-level functions like in_scope(), which includes both whitelist and blacklist checks. """ - def __init__(self, *seeds, whitelist=None, blacklist=None, strict_scope=False): - self.strict_scope = strict_scope - self.seeds = ScanSeeds(*seeds, strict_dns_scope=strict_scope) + def __init__(self, *seeds, whitelist=None, blacklist=None, strict_dns_scope=False): + self.strict_dns_scope = strict_dns_scope + self.seeds = ScanSeeds(*seeds, strict_dns_scope=strict_dns_scope) if whitelist is None: whitelist = self.seeds.hosts - self.whitelist = ScanWhitelist(*whitelist, strict_dns_scope=strict_scope) + self.whitelist = ScanWhitelist(*whitelist, strict_dns_scope=strict_dns_scope) if blacklist is None: blacklist = [] self.blacklist = ScanBlacklist(*blacklist) @@ -230,7 +230,7 @@ def json(self): "seeds": sorted(self.seeds.inputs), "whitelist": sorted(self.whitelist.inputs), "blacklist": sorted(self.blacklist.inputs), - "strict_scope": self.strict_scope, + "strict_dns_scope": self.strict_dns_scope, "hash": self.hash.hex(), "seed_hash": self.seeds.hash.hex(), "whitelist_hash": self.whitelist.hash.hex(), diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 20ef9c1694..d5714dfb98 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -180,7 +180,7 @@ def test_preset_scope(): blank_preset = blank_preset.bake() assert not blank_preset.target.seeds assert not blank_preset.target.whitelist - assert blank_preset.strict_scope is False + assert blank_preset.strict_dns_scope is False preset1 = Preset( "evilcorp.com", diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index a368718048..e2de5fb6f3 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -172,7 +172,7 @@ async def test_target_basic(bbot_scanner): bbottarget3 = BBOTTarget("evilcorp.com", whitelist=["1.2.3.4/24"], blacklist=["1.2.3.4"]) bbottarget5 = BBOTTarget("evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"]) bbottarget6 = BBOTTarget( - "evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"], strict_scope=True + "evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"], strict_dns_scope=True ) bbottarget8 = BBOTTarget("1.2.3.0/24", whitelist=["evilcorp.com", "evilcorp.net"], blacklist=["1.2.3.4"]) bbottarget9 = BBOTTarget("evilcorp.com", "evilcorp.net", whitelist=["1.2.3.0/24"], blacklist=["1.2.3.4"]) @@ -203,7 +203,7 @@ async def test_target_basic(bbot_scanner): assert bbottarget1 != bbottarget2 assert bbottarget2 != bbottarget1 - # make sure strict_scope is considered in hash + # make sure strict_dns_scope is considered in hash assert bbottarget5 != bbottarget6 assert bbottarget6 != bbottarget5 @@ -304,7 +304,7 @@ async def test_target_basic(bbot_scanner): assert target_dict["seeds"] == ["1.2.3.0/24", "bob@fdsa.evilcorp.net", "http://www.evilcorp.net/"] assert target_dict["whitelist"] == ["bob@www.evilcorp.com", "evilcorp.com", "evilcorp.net"] assert target_dict["blacklist"] == ["1.2.3.4", "4.3.2.0/24", "bob@asdf.evilcorp.net", "http://1.2.3.4/"] - assert target_dict["strict_scope"] is False + assert target_dict["strict_dns_scope"] is False assert target_dict["hash"] == "b36955a8238a71842fc5f23b11110c26ea07d451" assert target_dict["seed_hash"] == "560af51d1f3d69bc5c156fc270b28497fe52dec1" assert target_dict["whitelist_hash"] == "8ed0a7368e6d34630e1cfd419d2a73767debc4c4" @@ -327,7 +327,7 @@ async def test_target_basic(bbot_scanner): target = RadixTarget("www.evilcorp.com", "evilcorp.com", acl_mode=True) assert set(target) == {"evilcorp.com"} - # make sure strict_scope doesn't mess us up + # make sure strict_dns_scope doesn't mess us up target = RadixTarget("evilcorp.co.uk", "www.evilcorp.co.uk", acl_mode=True, strict_dns_scope=True) assert set(target.hosts) == {"evilcorp.co.uk", "www.evilcorp.co.uk"} assert "evilcorp.co.uk" in target From 7f3af826ac13639106b659e2c04061f6867a88e7 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 16 Apr 2025 11:02:26 -0400 Subject: [PATCH 084/147] ruffed --- bbot/scanner/preset/preset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 83df3bd940..b9b45607f0 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -483,7 +483,10 @@ def bake(self, scan=None): from bbot.scanner.target import BBOTTarget baked_preset._target = BBOTTarget( - *list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, strict_dns_scope=self.strict_dns_scope + *list(self._seeds), + whitelist=self._whitelist, + blacklist=self._blacklist, + strict_dns_scope=self.strict_dns_scope, ) # evaluate conditions From 275d6cd20dcc5deb00b3836ab06a5183822627a5 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 16 Apr 2025 11:50:18 -0400 Subject: [PATCH 085/147] strict dns scope --- bbot/test/test_step_1/test_cli.py | 2 +- bbot/test/test_step_1/test_presets.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index c663810289..c38b2fc652 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -698,7 +698,7 @@ def test_cli_presets(monkeypatch, capsys, caplog): stdout_preset.pop("description") assert stdout_preset == { "config": { - "scope": {"strict": True}, + "scope": {"strict_dns": True}, "dns": {"minimal": True}, "modules": {"speculate": {"essential_only": True}}, }, diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index d5714dfb98..61807a8651 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -214,7 +214,7 @@ def test_preset_scope(): "evilcorp.org", whitelist=["evilcorp.de"], blacklist=["test.www.evilcorp.de"], - config={"scope": {"strict": True}}, + config={"scope": {"strict_dns": True}}, ) preset1.merge(preset3) From 42933e5cfcaadcbf829aec4f2525204005d54aed Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 16 Apr 2025 12:30:50 -0400 Subject: [PATCH 086/147] fix cli tests --- bbot/scanner/preset/args.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py index 5ea6950bfe..6368b8d41e 100644 --- a/bbot/scanner/preset/args.py +++ b/bbot/scanner/preset/args.py @@ -197,7 +197,7 @@ def preset_from_args(self): raise BBOTArgumentError(f'Error parsing command-line config option: "{config_arg}": {e}') # strict scope - if self.parsed.strict_dns_scope: + if self.parsed.strict_scope: args_preset.core.merge_custom({"scope": {"strict_dns": True}}) return args_preset @@ -227,7 +227,7 @@ def create_parser(self, *args, **kwargs): target.add_argument( "--strict-scope", action="store_true", - help="Don't consider subdomains of target/whitelist to be in-scope", + help="Don't consider subdomains of target/whitelist to be in-scope - exact matches only", ) presets = p.add_argument_group(title="Presets") presets.add_argument( From 5788382a1c553675d2f3cf6adc4716d3156f805e Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 16 Apr 2025 13:09:12 -0400 Subject: [PATCH 087/147] whitelist should be null if not specified --- bbot/scanner/target.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 6c58f363d7..95ca6ac3d3 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -217,6 +217,7 @@ class BBOTTarget: def __init__(self, *seeds, whitelist=None, blacklist=None, strict_dns_scope=False): self.strict_dns_scope = strict_dns_scope self.seeds = ScanSeeds(*seeds, strict_dns_scope=strict_dns_scope) + self._orig_whitelist = whitelist if whitelist is None: whitelist = self.seeds.hosts self.whitelist = ScanWhitelist(*whitelist, strict_dns_scope=strict_dns_scope) @@ -228,7 +229,7 @@ def __init__(self, *seeds, whitelist=None, blacklist=None, strict_dns_scope=Fals def json(self): return { "seeds": sorted(self.seeds.inputs), - "whitelist": sorted(self.whitelist.inputs), + "whitelist": (None if not self._orig_whitelist else sorted(self.whitelist.inputs)), "blacklist": sorted(self.blacklist.inputs), "strict_dns_scope": self.strict_dns_scope, "hash": self.hash.hex(), From 2ea6cb6960618512787d24869c1b3f3551f22945 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 16 Apr 2025 17:25:34 -0400 Subject: [PATCH 088/147] fix tests --- bbot/scanner/preset/args.py | 2 +- bbot/scanner/preset/preset.py | 2 +- bbot/test/test_step_1/test_cli.py | 2 +- bbot/test/test_step_1/test_presets.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py index 6368b8d41e..a34847004b 100644 --- a/bbot/scanner/preset/args.py +++ b/bbot/scanner/preset/args.py @@ -198,7 +198,7 @@ def preset_from_args(self): # strict scope if self.parsed.strict_scope: - args_preset.core.merge_custom({"scope": {"strict_dns": True}}) + args_preset.core.merge_custom({"scope": {"strict": True}}) return args_preset diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index b9b45607f0..268d2af545 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -562,7 +562,7 @@ def scope_config(self): @property def strict_dns_scope(self): - return self.scope_config.get("strict_dns", False) + return self.scope_config.get("strict", False) def apply_log_level(self, apply_core=False): # silent takes precedence diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index c38b2fc652..c663810289 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -698,7 +698,7 @@ def test_cli_presets(monkeypatch, capsys, caplog): stdout_preset.pop("description") assert stdout_preset == { "config": { - "scope": {"strict_dns": True}, + "scope": {"strict": True}, "dns": {"minimal": True}, "modules": {"speculate": {"essential_only": True}}, }, diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 61807a8651..d5714dfb98 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -214,7 +214,7 @@ def test_preset_scope(): "evilcorp.org", whitelist=["evilcorp.de"], blacklist=["test.www.evilcorp.de"], - config={"scope": {"strict_dns": True}}, + config={"scope": {"strict": True}}, ) preset1.merge(preset3) From 5288bb02d38be6e47304373776e9d3caae858e43 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 10:22:25 -0400 Subject: [PATCH 089/147] better error handling in modules --- bbot/modules/base.py | 53 ++++++++++++++++++++++------------------- bbot/scanner/scanner.py | 2 +- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index b3aebf867c..18bfa91bcf 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -655,8 +655,8 @@ async def _worker(self): - Each event is subject to a post-check via '_event_postcheck()' to decide whether it should be handled. - Special 'FINISHED' events trigger the 'finish()' method of the module. """ - async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): - try: + try: + async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): while not self.scan.stopping and not self.errored: # hold the reigns if our outgoing queue is full if self._qsize > 0 and self.outgoing_event_queue.qsize() >= self._qsize: @@ -696,16 +696,18 @@ async def _worker(self): self.debug(f"Finished handling {event}") else: self.debug(f"Not accepting {event} because {reason}") - except asyncio.CancelledError: - # this trace was used for debugging leaked CancelledErrors from inside httpx - # self.log.trace("Worker cancelled") - raise - except BaseException as e: - if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): - self.scan.stop() - else: - self.error(f"Critical failure in module {self.name}: {e}") - self.error(traceback.format_exc()) + except asyncio.CancelledError: + # this trace was used for debugging leaked CancelledErrors from inside httpx + # self.log.trace("Worker cancelled") + raise + except RuntimeError as e: + self.trace(f"RuntimeError in module {self.name}: {e}") + except BaseException as e: + if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): + self.scan.stop() + else: + self.error(f"Critical failure in module {self.name}: {e}") + self.error(traceback.format_exc()) self.log.trace("Worker stopped") @property @@ -1653,8 +1655,8 @@ class BaseInterceptModule(BaseModule): _intercept = True async def _worker(self): - async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): - try: + try: + async with self.scan._acatch(context=self._worker, unhandled_is_critical=True): while not self.scan.stopping and not self.errored: try: if self.incoming_event_queue is not False: @@ -1710,16 +1712,19 @@ async def _worker(self): self.debug(f"Forwarding {event}") await self.forward_event(event, kwargs) - except asyncio.CancelledError: - # this trace was used for debugging leaked CancelledErrors from inside httpx - # self.log.trace("Worker cancelled") - raise - except BaseException as e: - if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): - self.scan.stop() - else: - self.critical(f"Critical failure in intercept module {self.name}: {e}") - self.critical(traceback.format_exc()) + except asyncio.CancelledError: + # this trace was used for debugging leaked CancelledErrors from inside httpx + # self.log.trace("Worker cancelled") + raise + except RuntimeError as e: + self.trace(f"RuntimeError in intercept module {self.name}: {e}") + except BaseException as e: + if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): + self.scan.stop() + else: + self.critical(f"Critical failure in intercept module {self.name}: {e}") + self.critical(traceback.format_exc()) + self.log.trace("Worker stopped") async def get_incoming_event(self): diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 4b8109e862..1f0bd3e739 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -312,7 +312,7 @@ async def _prep(self): self._fail_setup(msg) total_modules = total_failed + len(self.modules) - success_msg = f"Setup succeeded for {len(self.modules):,}/{total_modules:,} modules." + success_msg = f"Setup succeeded for {len(self.modules) - 2:,}/{total_modules - 2:,} modules." self.success(success_msg) self._prepped = True From f5ce513cd1674529ee8e54cff18276dd71497584 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 10:38:27 -0400 Subject: [PATCH 090/147] allow detailed status --- bbot/scanner/scanner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 1f0bd3e739..d84037ea6a 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -637,7 +637,7 @@ def num_queued_events(self): total += len(q._queue) return total - def modules_status(self, _log=False): + def modules_status(self, _log=False, detailed=False): finished = True status = {"modules": {}} @@ -707,7 +707,7 @@ def modules_status(self, _log=False): f"{self.name}: No events in queue ({self.stats.speedometer.speed:,} processed in the past {self.status_frequency} seconds)" ) - if self.log_level <= logging.DEBUG: + if detailed or self.log_level <= logging.DEBUG: # status debugging scan_active_status = [] scan_active_status.append(f"scan._finished_init: {self._finished_init}") From 61234c4f326ca81f952100678033e9d4ba9d4d31 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 11:02:28 -0400 Subject: [PATCH 091/147] fix scan tests --- bbot/test/test_step_1/test_scan.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index 814d4a5248..a0bf0b1008 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -35,9 +35,9 @@ async def test_scan( assert not scan0.in_scope("www.evilcorp.co.uk") j = scan0.json assert set(j["target"]["seeds"]) == {"1.1.1.0", "1.1.1.0/31", "evilcorp.com", "test.evilcorp.com"} - # we preserve the original whitelist inputs - assert set(j["target"]["whitelist"]) == {"1.1.1.0/32", "1.1.1.0/31", "evilcorp.com", "test.evilcorp.com"} - # but in the background they are collapsed + # no whitelist was set + assert j["target"]["whitelist"] is None + # but functionally it was copied from the seeds, and collapsed assert scan0.target.whitelist.hosts == {ip_network("1.1.1.0/31"), "evilcorp.com"} assert set(j["target"]["blacklist"]) == {"1.1.1.0/28", "www.evilcorp.com"} assert "ipneighbor" in j["preset"]["modules"] From f5f3049c8a3f099a00c5148c9a7470d79f73281e Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 11:33:08 -0400 Subject: [PATCH 092/147] none whitelist --- bbot/test/test_step_2/module_tests/test_module_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index 3641574213..de37354d0f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -29,7 +29,7 @@ def check(self, module_test, events): assert scan["uuid"] == str(module_test.scan.root_event.uuid) assert scan["parent_uuid"] == str(module_test.scan.root_event.uuid) assert scan["data_json"]["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan["data_json"]["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["whitelist"] is None assert dns_json["data"] == dns_data assert dns_json["id"] == str(dns_event.id) assert dns_json["uuid"] == str(dns_event.uuid) @@ -46,7 +46,7 @@ def check(self, module_test, events): assert scan_reconstructed.uuid == scan_event.uuid assert scan_reconstructed.parent_uuid == scan_event.uuid assert scan_reconstructed.data["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan_reconstructed.data["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan_reconstructed.data["target"]["whitelist"] is None assert dns_reconstructed.data == dns_data assert dns_reconstructed.uuid == dns_event.uuid assert dns_reconstructed.parent_uuid == module_test.scan.root_event.uuid From 5c93543415ece636adb9430f631c1e85600ae55a Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 12:39:47 -0400 Subject: [PATCH 093/147] fix sqlite test --- bbot/models/pydantic.py | 2 +- bbot/models/sql.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 816a10b0cd..45e74a0af8 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -130,7 +130,7 @@ class Target(BBOTBaseModel): name: str = "Default Target" strict_dns_scope: bool = False seeds: List = [] - whitelist: List = [] + whitelist: Optional[List] = None blacklist: List = [] hash: Annotated[str, "indexed", "unique"] scope_hash: Annotated[str, "indexed"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 8e5c12bc15..d58034ccf6 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -126,7 +126,7 @@ class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_dns_scope: bool = False seeds: List = Field(default=[], sa_type=JSON) - whitelist: List = Field(default=None, sa_type=JSON) + whitelist: Optional[List] = Field(default=None, sa_type=JSON) blacklist: List = Field(default=[], sa_type=JSON) hash: str = Field(sa_column=Column("hash", String(length=255), unique=True, primary_key=True, index=True)) scope_hash: str = Field(sa_column=Column("scope_hash", String(length=255), index=True)) From 6b715b1ba3ec2a1827ad4da55340c9a4a8ea89b9 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 14:19:03 -0400 Subject: [PATCH 094/147] fix mongo? --- bbot/test/test_step_2/module_tests/test_module_mongo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 3a85e1ee56..978d79c4b3 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -118,6 +118,7 @@ async def check(self, module_test, events): for db_event in db_events_pydantic: db_event.pop("reverse_host") db_event.pop("inserted_at") + db_event.pop("archived") # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" From d99f0b85aa6d62a43d2f33bb155c5679faac0273 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 14:39:53 -0400 Subject: [PATCH 095/147] fix kafka --- bbot/test/test_step_2/module_tests/test_module_kafka.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py index ee4c3a65f4..43d2eb4053 100644 --- a/bbot/test/test_step_2/module_tests/test_module_kafka.py +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -50,6 +50,9 @@ async def setup_before_prep(self, module_test): # Wait for Kafka to be ready await self.wait_for_port_open(9092) + await asyncio.sleep(1) + + async def check(self, module_test, events): from aiokafka import AIOKafkaConsumer self.consumer = AIOKafkaConsumer( @@ -59,7 +62,6 @@ async def setup_before_prep(self, module_test): ) await self.consumer.start() - async def check(self, module_test, events): try: events_json = [e.json() for e in events] events_json.sort(key=lambda x: x["timestamp"]) From 89c4fbd5c45c2e027ebde87040ef51d2661bfa51 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 17 Apr 2025 15:45:50 -0400 Subject: [PATCH 096/147] mark scan as finished --- bbot/scanner/scanner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index d84037ea6a..02f81933da 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -884,6 +884,7 @@ async def _cleanup(self): with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() + self.status = "FINISHED" def in_scope(self, *args, **kwargs): return self.preset.in_scope(*args, **kwargs) From 35cb4916bb71d912a0373d2fac200c6a2e813206 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 19 Apr 2025 10:58:10 -0400 Subject: [PATCH 097/147] fix rabbitmq tests --- .../module_tests/test_module_rabbitmq.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py index d05808c2da..0a050220aa 100644 --- a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py +++ b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py @@ -1,5 +1,6 @@ import json import asyncio +import aio_pika from contextlib import suppress from .base import ModuleTestBase @@ -17,8 +18,6 @@ class TestRabbitMQ(ModuleTestBase): skip_distro_tests = True async def setup_before_prep(self, module_test): - import aio_pika - # Start RabbitMQ await asyncio.create_subprocess_exec( "docker", "run", "-d", "--rm", "--name", "bbot-test-rabbitmq", "-p", "5672:5672", "rabbitmq:3-management" @@ -36,18 +35,18 @@ async def setup_before_prep(self, module_test): self.log.verbose(f"Waiting for RabbitMQ to be ready: {e}") await asyncio.sleep(0.5) # Wait a bit before retrying - self.connection = connection - self.channel = await self.connection.channel() - self.queue = await self.channel.declare_queue("bbot_events", durable=True) - async def check(self, module_test, events): + connection = await aio_pika.connect_robust("amqp://guest:guest@localhost/") + channel = await connection.channel() + queue = await channel.declare_queue("bbot_events", durable=True) + try: events_json = [e.json() for e in events] events_json.sort(key=lambda x: x["timestamp"]) # Collect events from RabbitMQ rabbitmq_events = [] - async with self.queue.iterator() as queue_iter: + async with queue.iterator() as queue_iter: async for message in queue_iter: async with message.process(): event_data = json.loads(message.body.decode("utf-8")) @@ -62,7 +61,7 @@ async def check(self, module_test, events): finally: # Clean up: Close the RabbitMQ connection - await self.connection.close() + await connection.close() # Stop RabbitMQ container await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-rabbitmq", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE From 0c6c9d6b23aded0a76fa5da1c3debb5abad33e2f Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 21 Apr 2025 10:10:45 -0400 Subject: [PATCH 098/147] keep dispatcher alive til end --- bbot/scanner/scanner.py | 8 +++++-- bbot/test/test_step_1/test_presets.py | 33 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 02f81933da..3791f053cf 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -425,7 +425,11 @@ async def async_start(self): await self._report() await self._cleanup() + # report on final scan status and shut down dispatcher await self.dispatcher.on_finish(self) + self.dispatcher_task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await self.dispatcher_task.wait() self._stop_log_handlers() @@ -828,8 +832,8 @@ def _cancel_tasks(self): # ticker if self.ticker_task: tasks.append(self.ticker_task) - # dispatcher - tasks += self.dispatcher_tasks + # we don't cancel the dispatcher task because it still needs to report on the final scan status + # tasks += self.dispatcher_tasks # manager worker loops tasks += self._manager_worker_loop_tasks self.helpers.cancel_tasks_sync(tasks) diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index d5714dfb98..80056dc7f3 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -568,6 +568,39 @@ def test_preset_module_resolution(clean_default_config): } +@pytest.mark.asyncio +async def test_custom_module_dir(): + custom_module_dir = bbot_test_dir / "custom_modules" + custom_module_dir.mkdir(parents=True, exist_ok=True) + + custom_module = custom_module_dir / "testmodule.py" + with open(custom_module, "w") as f: + f.write( + """ +from bbot.modules.base import BaseModule + +class TestModule(BaseModule): + watched_events = ["SCAN"] + + async def handle_event(self, event): + await self.emit_event("127.0.0.2", parent=event) +""" + ) + + preset = { + "module_dirs": [str(custom_module_dir)], + "modules": ["testmodule"], + } + preset = Preset.from_dict(preset) + + scan = Scanner("127.0.0.0/24", preset=preset) + events = [e async for e in scan.async_start()] + event_data = [(str(e.data), str(e.module)) for e in events] + assert ("127.0.0.2", "testmodule") in event_data + + shutil.rmtree(custom_module_dir) + + @pytest.mark.asyncio async def test_preset_module_loader(): custom_module_dir = bbot_test_dir / "custom_module_dir" From 1148376d167c8f83b692436098fcd685d6eea467 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 21 Apr 2025 10:14:03 -0400 Subject: [PATCH 099/147] fix dispatcher --- bbot/scanner/scanner.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 3791f053cf..ea037d408c 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -425,11 +425,8 @@ async def async_start(self): await self._report() await self._cleanup() - # report on final scan status and shut down dispatcher + # report on final scan status await self.dispatcher.on_finish(self) - self.dispatcher_task.cancel() - with contextlib.suppress(asyncio.CancelledError): - await self.dispatcher_task.wait() self._stop_log_handlers() @@ -974,6 +971,10 @@ def status(self, status): if status != self._status: self._status = status self._status_code = self._status_codes[status] + # clean out old dispatcher tasks + for task in list(self.dispatcher_tasks): + if task.done(): + self.dispatcher_tasks.remove(task) self.dispatcher_tasks.append( asyncio.create_task( self.dispatcher.catch(self.dispatcher.on_status, self._status, self.id), From 27bd324faccc7604be175f8f7c98b403d36a7cb1 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 21 Apr 2025 10:29:41 -0400 Subject: [PATCH 100/147] better status handling --- bbot/scanner/scanner.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index ea037d408c..feb56a0b4e 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -965,26 +965,27 @@ def status(self, status): """ status = str(status).strip().upper() if status in self._status_codes: - if self.status == "ABORTING" and not status == "ABORTED": - self.debug(f'Attempt to set invalid status "{status}" on aborted scan') - else: - if status != self._status: - self._status = status - self._status_code = self._status_codes[status] - # clean out old dispatcher tasks - for task in list(self.dispatcher_tasks): - if task.done(): - self.dispatcher_tasks.remove(task) - self.dispatcher_tasks.append( - asyncio.create_task( - self.dispatcher.catch(self.dispatcher.on_status, self._status, self.id), - name=f"{self.name}.dispatcher.on_status({status})", - ) - ) - else: - self.debug(f'Scan status is already "{status}"') + # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again + if self._status_codes[status] >= self.status_codes["ABORTED"]: + self.debug(f'Attempt to set invalid status "{status}" on already finished scan') + return + if status == self._status: + self.debug(f'Scan status is already "{status}"') + return + self._status = status + self._status_code = self._status_codes[status] + # clean out old dispatcher tasks + for task in list(self.dispatcher_tasks): + if task.done(): + self.dispatcher_tasks.remove(task) + self.dispatcher_tasks.append( + asyncio.create_task( + self.dispatcher.catch(self.dispatcher.on_status, self._status, self.id), + name=f"{self.name}.dispatcher.on_status({status})", + ) + ) else: - self.debug(f'Attempt to set invalid status "{status}" on scan') + self.warning(f'Attempt to set invalid status "{status}" on scan') def make_event(self, *args, **kwargs): kwargs["scan"] = self From 343b47f20bc5a41b51d33ad23dcc9e8ec882b670 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 21 Apr 2025 10:34:19 -0400 Subject: [PATCH 101/147] better finishing --- bbot/scanner/scanner.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index feb56a0b4e..d044611a5c 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -127,6 +127,7 @@ def __init__( self._success = False self._scan_finish_status_message = None + self._marked_finished = False if scan_id is not None: self.id = str(scan_id) @@ -390,8 +391,6 @@ async def async_start(self): new_activity = await self.finish() if not new_activity: self._success = True - scan_finish_event = await self._mark_finished() - yield scan_finish_event break await asyncio.sleep(0.1) @@ -415,6 +414,8 @@ async def async_start(self): self.critical(f"Unexpected error during scan:\n{traceback.format_exc()}") finally: + scan_finish_event = await self._mark_finished() + yield scan_finish_event tasks = self._cancel_tasks() self.debug(f"Awaiting {len(tasks):,} tasks") for task in tasks: @@ -439,6 +440,11 @@ async def async_start(self): log_fn(self._scan_finish_status_message) async def _mark_finished(self): + if self._marked_finished: + return + + self._marked_finished = True + if self.status == "ABORTING": status = "ABORTED" elif not self._success: @@ -759,6 +765,7 @@ def stop(self): self._drain_queues() self.helpers.kill_children() self.debug("Finished aborting scan") + self.status = "ABORTED" async def finish(self): """Finalizes the scan by invoking the `finished()` method on all active modules if new activity is detected. @@ -966,7 +973,7 @@ def status(self, status): status = str(status).strip().upper() if status in self._status_codes: # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again - if self._status_codes[status] >= self.status_codes["ABORTED"]: + if self._status_codes[status] >= self._status_codes["ABORTED"]: self.debug(f'Attempt to set invalid status "{status}" on already finished scan') return if status == self._status: From 4f2acba23bd08df5c0418dab7c782e3aefac0c49 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 21 Apr 2025 11:00:34 -0400 Subject: [PATCH 102/147] fixed ctrl+c --- bbot/scanner/scanner.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index d044611a5c..3753dc8f83 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -425,7 +425,6 @@ async def async_start(self): self.debug(f"Awaited {len(tasks):,} tasks") await self._report() await self._cleanup() - # report on final scan status await self.dispatcher.on_finish(self) @@ -457,20 +456,23 @@ async def _mark_finished(self): self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) - self._scan_finish_status_message = f"Scan {self.name} completed in {self.duration_human} with status {status}" + self._scan_finish_status_message = ( + f"Scan {self.name} completed in {self.duration_human} with status {self.status}" + ) scan_finish_event = self.finish_event(self._scan_finish_status_message, status) - # queue final scan event with output modules - output_modules = [m for m in self.modules.values() if m._type == "output" and m.name != "python"] - for m in output_modules: - await m.queue_event(scan_finish_event) - # wait until output modules are flushed - while 1: - modules_finished = all(m.finished for m in output_modules) - if modules_finished: - break - await asyncio.sleep(0.05) + if not self._stopping: + # queue final scan event with output modules + output_modules = [m for m in self.modules.values() if m._type == "output" and m.name != "python"] + for m in output_modules: + await m.queue_event(scan_finish_event) + # wait until output modules are flushed + while 1: + modules_finished = all([m.finished for m in output_modules]) + if modules_finished: + break + await asyncio.sleep(0.05) self.status = status return scan_finish_event @@ -971,9 +973,10 @@ def status(self, status): Block setting after status has been aborted """ status = str(status).strip().upper() + self.debug(f"Setting scan status from {self.status} to {status}") if status in self._status_codes: # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again - if self._status_codes[status] >= self._status_codes["ABORTED"]: + if self._status_code >= self._status_codes["ABORTED"]: self.debug(f'Attempt to set invalid status "{status}" on already finished scan') return if status == self._status: From 2d9d49515d6374fd5151b25becbd27c3b9931314 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 21 Apr 2025 11:56:21 -0400 Subject: [PATCH 103/147] remove cleaning up status --- bbot/scanner/scanner.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 3753dc8f83..14d3edca5a 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -55,7 +55,6 @@ class Scanner: - "STARTING" (1): Status when the scan is initializing. - "RUNNING" (2): Status when the scan is in progress. - "FINISHING" (3): Status when the scan is in the process of finalizing. - - "CLEANING_UP" (4): Status when the scan is cleaning up resources. - "ABORTING" (5): Status when the scan is in the process of being aborted. - "ABORTED" (6): Status when the scan has been aborted. - "FAILED" (7): Status when the scan has encountered a failure. @@ -90,7 +89,6 @@ class Scanner: "STARTING": 1, "RUNNING": 2, "FINISHING": 3, - "CLEANING_UP": 4, "ABORTING": 5, "ABORTED": 6, "FAILED": 7, @@ -870,7 +868,8 @@ async def _cleanup(self): This method is called once at the end of the scan to perform resource cleanup tasks. It is executed regardless of whether the scan was aborted or completed - successfully. The scan status is set to "CLEANING_UP" during the execution. + successfully. + After calling the `cleanup()` method for each module, it performs additional cleanup tasks such as removing the scan's home directory if empty and cleaning old scans. @@ -881,7 +880,6 @@ async def _cleanup(self): # clean up self if not self._cleanedup: self._cleanedup = True - self.status = "CLEANING_UP" # clean up modules for mod in self.modules.values(): await mod._cleanup() @@ -894,7 +892,6 @@ async def _cleanup(self): with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() - self.status = "FINISHED" def in_scope(self, *args, **kwargs): return self.preset.in_scope(*args, **kwargs) From fec51cf1fe2d79a0e16395a3938afe6523bc1885 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 23 Apr 2025 14:59:18 -0400 Subject: [PATCH 104/147] pika imports --- bbot/test/test_step_2/module_tests/test_module_rabbitmq.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py index 0a050220aa..c272e0b86c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py +++ b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py @@ -1,6 +1,5 @@ import json import asyncio -import aio_pika from contextlib import suppress from .base import ModuleTestBase @@ -18,6 +17,8 @@ class TestRabbitMQ(ModuleTestBase): skip_distro_tests = True async def setup_before_prep(self, module_test): + import aio_pika + # Start RabbitMQ await asyncio.create_subprocess_exec( "docker", "run", "-d", "--rm", "--name", "bbot-test-rabbitmq", "-p", "5672:5672", "rabbitmq:3-management" @@ -36,6 +37,8 @@ async def setup_before_prep(self, module_test): await asyncio.sleep(0.5) # Wait a bit before retrying async def check(self, module_test, events): + import aio_pika + connection = await aio_pika.connect_robust("amqp://guest:guest@localhost/") channel = await connection.channel() queue = await channel.declare_queue("bbot_events", durable=True) From 7730ae82aed7122c137cd1ded32e24ef416522cd Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 24 Apr 2025 14:24:53 -0400 Subject: [PATCH 105/147] keep strict_scope in preset for backwards compatibility --- bbot/defaults.yml | 6 +++--- bbot/scanner/preset/preset.py | 2 +- bbot/test/test_step_1/test_presets.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 165974bdb4..b9f118b6b4 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -14,9 +14,9 @@ folder_blobs: false ### SCOPE ### scope: - # strict DNS scope means only exact DNS names are considered in-scope - # subdomains are not included unless they are explicitly whitelisted - strict_dns: false + # strict scope means only exact DNS names are considered in-scope + # their subdomains are not included unless explicitly whitelisted + strict: false # Filter by scope distance which events are displayed in the output # 0 == show only in-scope events (affiliates are always shown) # 1 == show all events up to distance-1 (1 hop from target) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 268d2af545..4d0e97b167 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -486,7 +486,7 @@ def bake(self, scan=None): *list(self._seeds), whitelist=self._whitelist, blacklist=self._blacklist, - strict_dns_scope=self.strict_dns_scope, + strict_dns_scope=self.strict_scope, ) # evaluate conditions diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 80056dc7f3..c9efd1dbb9 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -180,7 +180,7 @@ def test_preset_scope(): blank_preset = blank_preset.bake() assert not blank_preset.target.seeds assert not blank_preset.target.whitelist - assert blank_preset.strict_dns_scope is False + assert blank_preset.strict_scope is False preset1 = Preset( "evilcorp.com", From 8813b78a532f484b6c037bde2dba24a3493677da Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 24 Apr 2025 15:37:15 -0400 Subject: [PATCH 106/147] fix preset --- bbot/scanner/preset/preset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 4d0e97b167..34b4058e62 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -561,7 +561,7 @@ def scope_config(self): return self.config.get("scope", {}) @property - def strict_dns_scope(self): + def strict_scope(self): return self.scope_config.get("strict", False) def apply_log_level(self, apply_core=False): From 6a0e5ade7089ad145efc66f5492fa57eb54de98b Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 24 Apr 2025 16:14:05 -0400 Subject: [PATCH 107/147] fix preset again --- bbot/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/cli.py b/bbot/cli.py index 779a5ab557..333ab8c202 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -198,7 +198,7 @@ async def _main(): if sys.stdin.isatty(): # warn if any targets belong directly to a cloud provider - if not scan.preset.strict_dns_scope: + if not scan.preset.strict_scope: for event in scan.target.seeds.event_seeds: if event.type == "DNS_NAME": cloudcheck_result = scan.helpers.cloudcheck(event.host) From f09109e6107327666809c3e6287dd0db7ba0a312 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 25 Apr 2025 11:27:18 -0400 Subject: [PATCH 108/147] union -> optional --- bbot/models/pydantic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 45e74a0af8..eb35bc721f 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,7 +1,7 @@ import json import logging from pydantic import BaseModel, ConfigDict, Field -from typing import Optional, List, Union, Annotated +from typing import Optional, List, Annotated, Optional from bbot.models.helpers import utc_now_timestamp @@ -73,8 +73,8 @@ class Event(BBOTBaseModel): # we store the host in reverse to allow for instant subdomain queries # this works because indexes are left-anchored, but we need to search starting from the right side reverse_host: Annotated[Optional[str], "indexed"] = "" - resolved_hosts: Union[List, None] = None - dns_children: Union[dict, None] = None + resolved_hosts: Optional[List] = None + dns_children: Optional[dict] = None web_spider_distance: int = 10 scope_distance: int = 10 scan: Annotated[str, "indexed"] From d49014616ba94732215a5fda195113eb4da2cc95 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 25 Apr 2025 14:30:00 -0400 Subject: [PATCH 109/147] ruffed --- bbot/models/pydantic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index eb35bc721f..4ec7a3dc65 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,7 +1,7 @@ import json import logging +from typing import Optional, List, Annotated from pydantic import BaseModel, ConfigDict, Field -from typing import Optional, List, Annotated, Optional from bbot.models.helpers import utc_now_timestamp From c64fdea3622302073cb65ac3618045e294f81280 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Apr 2025 12:47:16 -0400 Subject: [PATCH 110/147] don't import scanner or preset in bbot __init__.py --- .github/workflows/tests.yml | 2 +- bbot/__init__.py | 3 --- bbot/scripts/docs.py | 2 +- bbot/test/fastapi_test.py | 2 +- bbot/test/test_step_1/test__module__tests.py | 2 +- bbot/test/test_step_1/test_bbot_fastapi.py | 2 +- bbot/test/test_step_1/test_python_api.py | 2 +- bbot/test/test_step_1/test_regexes.py | 2 +- bbot/test/test_step_1/test_scan.py | 2 +- 9 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 95c9e3cd8f..915b86d9b8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -72,7 +72,7 @@ jobs: run: python -m build - name: Publish Pypi package if: github.ref == 'refs/heads/stable' || github.ref == 'refs/heads/dev' - uses: pypa/gh-action-pypi-publish@release/v1.12 + uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} - name: Get BBOT version diff --git a/bbot/__init__.py b/bbot/__init__.py index 914c45ff4b..dcf055264c 100644 --- a/bbot/__init__.py +++ b/bbot/__init__.py @@ -1,6 +1,3 @@ # version placeholder (replaced by poetry-dynamic-versioning) __version__ = "v0.0.0" -from .scanner import Scanner, Preset - -__all__ = ["Scanner", "Preset"] diff --git a/bbot/scripts/docs.py b/bbot/scripts/docs.py index ef0187ed9c..6ccc88f542 100755 --- a/bbot/scripts/docs.py +++ b/bbot/scripts/docs.py @@ -6,7 +6,7 @@ import yaml from pathlib import Path -from bbot import Preset +from bbot.scanner import Preset from bbot.core.modules import MODULE_LOADER diff --git a/bbot/test/fastapi_test.py b/bbot/test/fastapi_test.py index f0c7b2d789..a4a1d57107 100644 --- a/bbot/test/fastapi_test.py +++ b/bbot/test/fastapi_test.py @@ -1,5 +1,5 @@ from typing import List -from bbot import Scanner +from bbot.scanner import Scanner from fastapi import FastAPI, Query app = FastAPI() diff --git a/bbot/test/test_step_1/test__module__tests.py b/bbot/test/test_step_1/test__module__tests.py index 6221b61490..b68ad50a5d 100644 --- a/bbot/test/test_step_1/test__module__tests.py +++ b/bbot/test/test_step_1/test__module__tests.py @@ -2,7 +2,7 @@ import importlib from pathlib import Path -from bbot import Preset +from bbot.scanner import Preset from ..test_step_2.module_tests.base import ModuleTestBase log = logging.getLogger("bbot.test.modules") diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index 9f54e4a881..98f97e514c 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -9,7 +9,7 @@ def run_bbot_multiprocess(queue): - from bbot import Scanner + from bbot.scanner import Scanner scan = Scanner("http://127.0.0.1:8888", "blacklanternsecurity.com", modules=["httpx"]) events = [e.json() for e in scan.start()] diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 89532b180c..9cd7e1c974 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -3,7 +3,7 @@ @pytest.mark.asyncio async def test_python_api(): - from bbot import Scanner + from bbot.scanner import Scanner # make sure events are properly yielded scan1 = Scanner("127.0.0.1") diff --git a/bbot/test/test_step_1/test_regexes.py b/bbot/test/test_step_1/test_regexes.py index c68fd8e13b..6558326341 100644 --- a/bbot/test/test_step_1/test_regexes.py +++ b/bbot/test/test_step_1/test_regexes.py @@ -334,7 +334,7 @@ def test_url_regexes(): @pytest.mark.asyncio async def test_regex_helper(): - from bbot import Scanner + from bbot.scanner import Scanner scan = Scanner("evilcorp.com", "evilcorp.org", "evilcorp.net", "evilcorp.co.uk") diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index a0bf0b1008..b366948b45 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -173,7 +173,7 @@ async def test_huge_target_list(bbot_scanner, monkeypatch): async def test_exclude_cdn(bbot_scanner, monkeypatch): # test that CDN exclusion works - from bbot import Preset + from bbot.scanner import Preset dns_mock = { "evilcorp.com": {"A": ["127.0.0.1"]}, From 9b7475cc43c19faff568edd255c193858b18af06 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Apr 2025 12:47:27 -0400 Subject: [PATCH 111/147] ruffed --- bbot/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bbot/__init__.py b/bbot/__init__.py index dcf055264c..8e016095f4 100644 --- a/bbot/__init__.py +++ b/bbot/__init__.py @@ -1,3 +1,2 @@ # version placeholder (replaced by poetry-dynamic-versioning) __version__ = "v0.0.0" - From 18de9a21d261f916e52c2003f11d8948e8a8b1fa Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Apr 2025 15:55:35 -0400 Subject: [PATCH 112/147] vuln rework --- bbot/core/event/base.py | 4 ++++ bbot/modules/ajaxpro.py | 4 +++- bbot/modules/baddns.py | 2 ++ bbot/modules/baddns_direct.py | 1 + bbot/modules/badsecrets.py | 2 ++ bbot/modules/bypass403.py | 8 +++++++- bbot/modules/dastardly.py | 2 ++ bbot/modules/dotnetnuke.py | 6 ++++++ bbot/modules/git.py | 7 ++++++- bbot/modules/gitlab.py | 6 +++++- bbot/modules/host_header.py | 3 +++ bbot/modules/hunt.py | 6 +++++- bbot/modules/iis_shortnames.py | 8 +++++++- bbot/modules/lightfuzz/lightfuzz.py | 1 + bbot/modules/lightfuzz/submodules/cmdi.py | 1 + bbot/modules/lightfuzz/submodules/crypto.py | 3 +++ bbot/modules/lightfuzz/submodules/nosqli.py | 2 ++ bbot/modules/lightfuzz/submodules/path.py | 2 ++ bbot/modules/lightfuzz/submodules/serial.py | 2 ++ bbot/modules/lightfuzz/submodules/sqli.py | 3 +++ bbot/modules/lightfuzz/submodules/ssti.py | 1 + bbot/modules/lightfuzz/submodules/xss.py | 1 + bbot/modules/newsletters.py | 7 ++++++- bbot/modules/ntlm.py | 1 + bbot/modules/nuclei.py | 2 ++ bbot/modules/oauth.py | 2 ++ bbot/modules/reflected_parameters.py | 18 ++++++++++++------ bbot/modules/shodan_idb.py | 9 +++++++-- bbot/modules/smuggler.py | 7 ++++++- bbot/modules/telerik.py | 19 +++++++++++++++++-- bbot/modules/trufflehog.py | 1 + bbot/modules/url_manipulation.py | 9 +++++++-- bbot/modules/wpscan.py | 10 +++++++++- 33 files changed, 139 insertions(+), 21 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 4c6288b3f1..f49a287774 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1552,6 +1552,7 @@ class VULNERABILITY(ClosestHostEvent): "HIGH": "🟥", "MEDIUM": "🟧", "LOW": "🟨", + "INFO": "🟦", "UNKNOWN": "⬜", } @@ -1562,9 +1563,11 @@ def sanitize_data(self, data): class _data_validator(BaseModel): host: Optional[str] = None severity: str + name: str description: str url: Optional[str] = None path: Optional[str] = None + cves: Optional[list[str]] = None _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) _validate_severity = field_validator("severity")(validators.validate_severity) @@ -1579,6 +1582,7 @@ class FINDING(ClosestHostEvent): class _data_validator(BaseModel): host: Optional[str] = None + name: str description: str url: Optional[str] = None path: Optional[str] = None diff --git a/bbot/modules/ajaxpro.py b/bbot/modules/ajaxpro.py index 1df424ebcc..c1ff413915 100644 --- a/bbot/modules/ajaxpro.py +++ b/bbot/modules/ajaxpro.py @@ -10,7 +10,7 @@ class ajaxpro(BaseModule): ajaxpro_regex = re.compile(r' collapse_threshold: await self.emit_event( { + "name": "Possible 403 Bypass", "description": f"403 Bypass MULTIPLE SIGNATURES (exceeded threshold {str(collapse_threshold)})", "host": str(event.host), "url": event.data, @@ -152,7 +153,12 @@ async def handle_event(self, event): else: for description in results: await self.emit_event( - {"description": description, "host": str(event.host), "url": event.data}, + { + "name": "Possible 403 Bypass", + "description": description, + "host": str(event.host), + "url": event.data, + }, "FINDING", parent=event, context=f"{{module}} discovered potential 403 bypass ({{event.type}}) for {event.data}", diff --git a/bbot/modules/dastardly.py b/bbot/modules/dastardly.py index b9dd772a73..5b81bc14b1 100644 --- a/bbot/modules/dastardly.py +++ b/bbot/modules/dastardly.py @@ -49,6 +49,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": failure.instance, + "name": "Dastardly - Possible Vulnerability", }, "FINDING", event, @@ -61,6 +62,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": failure.instance, + "name": "Dastardly - Possible Vulnerability", }, "VULNERABILITY", event, diff --git a/bbot/modules/dotnetnuke.py b/bbot/modules/dotnetnuke.py index 7e8b4d3d4e..0f7301b33a 100644 --- a/bbot/modules/dotnetnuke.py +++ b/bbot/modules/dotnetnuke.py @@ -55,6 +55,8 @@ async def interactsh_callback(self, r): "host": str(event.host), "url": url, "description": description, + "cves": ["CVE-2017-0929"], + "name": "DotNetNuke Blind-SSRF", }, "VULNERABILITY", event, @@ -106,6 +108,7 @@ async def handle_event(self, event): "description": description, "host": str(event.host), "url": probe_url, + "name": "DotNetNuke Cookie Deserialization", }, "VULNERABILITY", event, @@ -124,6 +127,7 @@ async def handle_event(self, event): { "severity": "CRITICAL", "description": description, + "name": "DotNetNuke Arbitrary File Read", "host": str(event.host), "url": f"{event.data['url']}/DesktopModules/dnnUI_NewsArticlesSlider/ImageHandler.ashx", }, @@ -143,6 +147,7 @@ async def handle_event(self, event): { "severity": "CRITICAL", "description": description, + "name": "DotNetNuke Arbitrary File Read", "host": str(event.host), "url": f"{event.data['url']}/Desktopmodules/DNNArticle/GetCSS.ashx/?CP=%2fweb.config", }, @@ -164,6 +169,7 @@ async def handle_event(self, event): { "severity": "CRITICAL", "description": description, + "name": "DotNetNuke Privilege Escalation", "host": str(event.host), "url": f"{event.data['url']}/Install/InstallWizard.aspx", }, diff --git a/bbot/modules/git.py b/bbot/modules/git.py index 569aa0e489..0c069bd958 100644 --- a/bbot/modules/git.py +++ b/bbot/modules/git.py @@ -32,7 +32,12 @@ async def handle_event(self, event): if getattr(response, "status_code", 0) == 200 and "[core]" in text and not self.fp_regex.match(text): description = f"Exposed .git config at {url}" await self.emit_event( - {"host": str(event.host), "url": url, "description": description}, + { + "host": str(event.host), + "url": url, + "description": description, + "name": "Exposed .git config", + }, "FINDING", event, context="{module} detected {event.type}: {description}", diff --git a/bbot/modules/gitlab.py b/bbot/modules/gitlab.py index e1ba3850ee..f2379b303a 100644 --- a/bbot/modules/gitlab.py +++ b/bbot/modules/gitlab.py @@ -57,7 +57,11 @@ async def handle_http_response(self, event): ) description = f"GitLab server at {event.host}" await self.emit_event( - {"host": str(event.host), "description": description}, + { + "host": str(event.host), + "description": description, + "name": "GitLab Server", + }, "FINDING", parent=event, context=f"{{module}} detected {{event.type}}: {description}", diff --git a/bbot/modules/host_header.py b/bbot/modules/host_header.py index a60967b8b4..2d664f42d0 100644 --- a/bbot/modules/host_header.py +++ b/bbot/modules/host_header.py @@ -49,6 +49,7 @@ async def interactsh_callback(self, r): { "host": str(matched_event.host), "url": matched_event.data["url"], + "name": "Host Header Spoofing", "description": f"Spoofed Host header ({matched_technique}) [{protocol}] interaction", }, "FINDING", @@ -142,6 +143,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": description, + "name": "Duplicate Host Header Tolerated", }, "FINDING", event, @@ -184,6 +186,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": description, + "name": "Possible Host Header Injection", }, "FINDING", event, diff --git a/bbot/modules/hunt.py b/bbot/modules/hunt.py index 6f3e619965..01ba2cf0e7 100644 --- a/bbot/modules/hunt.py +++ b/bbot/modules/hunt.py @@ -305,7 +305,11 @@ async def handle_event(self, event): f" Original Value: [{self.helpers.truncate_string(str(event.data['original_value']), 200)}]" ) - data = {"host": str(event.host), "description": description} + data = { + "host": str(event.host), + "description": description, + "name": "Potentially Interesting Parameter", + } url = event.data.get("url", "") if url: data["url"] = url diff --git a/bbot/modules/iis_shortnames.py b/bbot/modules/iis_shortnames.py index 48860445f5..0db398b815 100644 --- a/bbot/modules/iis_shortnames.py +++ b/bbot/modules/iis_shortnames.py @@ -235,7 +235,13 @@ class safety_counter_obj: description = f"IIS Shortname Vulnerability Detected. Potentially Vulnerable Method/Techniques: [{','.join(technique_strings)}]" await self.emit_event( - {"severity": "LOW", "host": str(event.host), "url": normalized_url, "description": description}, + { + "severity": "LOW", + "host": str(event.host), + "url": normalized_url, + "description": description, + "name": "IIS Shortnames", + }, "VULNERABILITY", event, context="{module} detected low {event.type}: IIS shortname enumeration", diff --git a/bbot/modules/lightfuzz/lightfuzz.py b/bbot/modules/lightfuzz/lightfuzz.py index 754ff7dd61..e178399e35 100644 --- a/bbot/modules/lightfuzz/lightfuzz.py +++ b/bbot/modules/lightfuzz/lightfuzz.py @@ -73,6 +73,7 @@ async def interactsh_callback(self, r): "severity": "CRITICAL", "host": str(details["event"].host), "url": details["event"].data["url"], + "name": "Lightfuzz - OS Command Injection", "description": f"OS Command Injection (OOB Interaction) Type: [{details['type']}] Parameter Name: [{details['name']}] Probe: [{details['probe']}]", }, "VULNERABILITY", diff --git a/bbot/modules/lightfuzz/submodules/cmdi.py b/bbot/modules/lightfuzz/submodules/cmdi.py index 11576f1dc5..51b256c08f 100644 --- a/bbot/modules/lightfuzz/submodules/cmdi.py +++ b/bbot/modules/lightfuzz/submodules/cmdi.py @@ -75,6 +75,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Command Injection", "description": f"POSSIBLE OS Command Injection. {self.metadata()} Detection Method: [echo canary] CMD Probe Delimeters: [{' '.join(positive_detections)}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/crypto.py b/bbot/modules/lightfuzz/submodules/crypto.py index 1134d95af9..36b8ac18c6 100644 --- a/bbot/modules/lightfuzz/submodules/crypto.py +++ b/bbot/modules/lightfuzz/submodules/crypto.py @@ -320,6 +320,7 @@ async def error_string_search(self, text_dict, baseline_text): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Cryptographic Error", "description": f"Possible Cryptographic Error. {self.metadata()} Strings: [{','.join(unique_matches)}] Detection Technique(s): [{','.join(matching_techniques)}]", "context": context, } @@ -414,6 +415,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Probable Cryptographic Parameter", "description": f"Probable Cryptographic Parameter. {self.metadata()} Detection Technique(s): [{', '.join(confirmed_techniques)}]", "context": context, } @@ -468,6 +470,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Length Extension Attack", "description": f"Possible {self.event.data['type']} parameter with {hash_instance.name.upper()} Hash as value. {self.metadata()}, linked to additional parameter [{additional_param_name}]", "context": context, } diff --git a/bbot/modules/lightfuzz/submodules/nosqli.py b/bbot/modules/lightfuzz/submodules/nosqli.py index 0155ec3c24..90f8af4c7c 100644 --- a/bbot/modules/lightfuzz/submodules/nosqli.py +++ b/bbot/modules/lightfuzz/submodules/nosqli.py @@ -112,6 +112,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible NoSQL Injection", "description": f"Possible NoSQL Injection. {self.metadata()} Detection Method: [Quote/Escaped Quote + Conditional Affect] Differences: [{'.'.join(confirmation_probe_false_diff_reasons)}]", } ) @@ -176,6 +177,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible NoSQL Injection", "description": f"Possible NoSQL Injection. {self.metadata()} Detection Method: [Parameter Name Operator Injection - Negation ([$ne])] Differences: [{'.'.join(nosqli_negate_diff_reasons)}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/path.py b/bbot/modules/lightfuzz/submodules/path.py index 44047e2907..39bdd9c7c3 100644 --- a/bbot/modules/lightfuzz/submodules/path.py +++ b/bbot/modules/lightfuzz/submodules/path.py @@ -122,6 +122,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Path Traversal", "description": f"POSSIBLE Path Traversal. {self.metadata()} Detection Method: [{path_technique}]", } ) @@ -149,6 +150,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Path Traversal", "description": f"POSSIBLE Path Traversal. {self.metadata()} Detection Method: [Absolute Path: {path}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/serial.py b/bbot/modules/lightfuzz/submodules/serial.py index aa74fd9d08..68bff7b6d6 100644 --- a/bbot/modules/lightfuzz/submodules/serial.py +++ b/bbot/modules/lightfuzz/submodules/serial.py @@ -156,6 +156,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Unsafe Deserialization", "description": f"POSSIBLE Unsafe Deserialization. {self.metadata()} Technique: [Error Resolution] Serialization Payload: [{type}]", } ) @@ -173,6 +174,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Unsafe Deserialization", "description": f"POSSIBLE Unsafe Deserialization. {self.metadata()} Technique: [Differential Error Analysis] Error-String: [{serialization_error}] Payload: [{type}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/sqli.py b/bbot/modules/lightfuzz/submodules/sqli.py index 17afaf9903..5448f1abbd 100644 --- a/bbot/modules/lightfuzz/submodules/sqli.py +++ b/bbot/modules/lightfuzz/submodules/sqli.py @@ -100,6 +100,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible SQL Injection", "description": f"Possible SQL Injection. {self.metadata()} Detection Method: [SQL Error Detection] Detected String: [{sqli_error_string}]", } ) @@ -119,6 +120,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible SQL Injection", "description": f"Possible SQL Injection. {self.metadata()} Detection Method: [Single Quote/Two Single Quote, Code Change ({http_compare.baseline.status_code}->{single_quote[3].status_code}->{double_single_quote[3].status_code})]", } ) @@ -179,6 +181,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Blind SQL Injection", "description": f"Possible Blind SQL Injection. {self.metadata()} Detection Method: [Delay Probe ({p})]", } ) diff --git a/bbot/modules/lightfuzz/submodules/ssti.py b/bbot/modules/lightfuzz/submodules/ssti.py index 544b10b103..d871ec03a1 100644 --- a/bbot/modules/lightfuzz/submodules/ssti.py +++ b/bbot/modules/lightfuzz/submodules/ssti.py @@ -33,6 +33,7 @@ async def fuzz(self): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Server-side Template Injection", "description": f"POSSIBLE Server-side Template Injection. {self.metadata()} Detection Method: [Integer Multiplication] Payload: [{probe_value}]", } ) diff --git a/bbot/modules/lightfuzz/submodules/xss.py b/bbot/modules/lightfuzz/submodules/xss.py index 3fc02550e1..13c192915b 100644 --- a/bbot/modules/lightfuzz/submodules/xss.py +++ b/bbot/modules/lightfuzz/submodules/xss.py @@ -91,6 +91,7 @@ async def check_probe(self, cookies, probe, match, context): self.results.append( { "type": "FINDING", + "name": "Lightfuzz - Possible Reflected XSS", "description": f"Possible Reflected XSS. Parameter: [{self.event.data['name']}] Context: [{context}] Parameter Type: [{self.event.data['type']}]", } ) diff --git a/bbot/modules/newsletters.py b/bbot/modules/newsletters.py index 114f7d66fd..3b23307952 100644 --- a/bbot/modules/newsletters.py +++ b/bbot/modules/newsletters.py @@ -51,7 +51,12 @@ async def handle_event(self, event): result = self.find_type(soup) if result: description = "Found a Newsletter Submission Form that could be used for email bombing attacks" - data = {"host": str(_event.host), "description": description, "url": _event.data["url"]} + data = { + "host": str(_event.host), + "description": description, + "url": _event.data["url"], + "name": "Newsletter Submission Form", + } await self.emit_event( data, "FINDING", diff --git a/bbot/modules/ntlm.py b/bbot/modules/ntlm.py index 67268616de..90c5a986c2 100644 --- a/bbot/modules/ntlm.py +++ b/bbot/modules/ntlm.py @@ -120,6 +120,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": f"NTLM AUTH: {ntlm_resp_decoded}", + "name": "NTLM Authentication", }, "FINDING", parent=event, diff --git a/bbot/modules/nuclei.py b/bbot/modules/nuclei.py index 23e091601a..ee59bd9b61 100644 --- a/bbot/modules/nuclei.py +++ b/bbot/modules/nuclei.py @@ -173,6 +173,7 @@ async def handle_batch(self, *events): "host": str(parent_event.host), "url": url, "description": description_string, + "name": f"Nuclei Vuln - {name}", }, "FINDING", parent_event, @@ -185,6 +186,7 @@ async def handle_batch(self, *events): "host": str(parent_event.host), "url": url, "description": description_string, + "name": f"Nuclei Vuln - {name}", }, "VULNERABILITY", parent_event, diff --git a/bbot/modules/oauth.py b/bbot/modules/oauth.py index 58c0507c09..559dea9313 100644 --- a/bbot/modules/oauth.py +++ b/bbot/modules/oauth.py @@ -62,6 +62,7 @@ async def handle_event(self, event): if token_endpoint: finding_event = self.make_event( { + "name": "OpenID Connect Endpoint", "description": f"OpenID Connect Endpoint (domain: {source_domain}) found at {url}", "host": event.host, "url": url, @@ -101,6 +102,7 @@ async def handle_event(self, event): description = f"Potentially Sprayable OAUTH Endpoint (domain: {source_domain}) at {url}" oauth_finding = self.make_event( { + "name": "Potentially Sprayable OAUTH Endpoint", "description": description, "host": event.host, "url": url, diff --git a/bbot/modules/reflected_parameters.py b/bbot/modules/reflected_parameters.py index f7e17e57e6..a942edd473 100644 --- a/bbot/modules/reflected_parameters.py +++ b/bbot/modules/reflected_parameters.py @@ -25,7 +25,12 @@ async def handle_event(self, event): description += ( f" Original Value: [{self.helpers.truncate_string(str(event.data['original_value']), 200)}]" ) - data = {"host": str(event.host), "description": description, "url": url} + data = { + "host": str(event.host), + "description": description, + "url": url, + "name": "Reflected Parameter", + } await self.emit_event(data, "FINDING", event) async def detect_reflection(self, event, url): @@ -56,17 +61,18 @@ async def send_probe_with_canary(self, event, parameter_name, parameter_value, c data = None json_data = None params = {parameter_name: parameter_value, "c4n4ry": canary_value} + param_type = event.data["type"] - if event.data["type"] == "GETPARAM": + if param_type == "GETPARAM": url = f"{url}?{parameter_name}={parameter_value}&c4n4ry={canary_value}" - elif event.data["type"] == "COOKIE": + elif param_type == "COOKIE": cookies.update(params) - elif event.data["type"] == "HEADER": + elif param_type == "HEADER": headers.update(params) - elif event.data["type"] == "POSTPARAM": + elif param_type == "POSTPARAM": method = "POST" data = params - elif event.data["type"] == "BODYJSON": + elif param_type == "BODYJSON": method = "POST" json_data = params diff --git a/bbot/modules/shodan_idb.py b/bbot/modules/shodan_idb.py index 4a3e2b214a..72fadfaf08 100644 --- a/bbot/modules/shodan_idb.py +++ b/bbot/modules/shodan_idb.py @@ -40,7 +40,7 @@ class shodan_idb(BaseModule): """ watched_events = ["IP_ADDRESS", "DNS_NAME"] - produced_events = ["TECHNOLOGY", "VULNERABILITY", "FINDING", "OPEN_TCP_PORT", "DNS_NAME"] + produced_events = ["TECHNOLOGY", "FINDING", "OPEN_TCP_PORT", "DNS_NAME"] flags = ["passive", "safe", "portscan", "subdomain-enum"] meta = { "description": "Query Shodan's InternetDB for open ports, hostnames, technologies, and vulnerabilities", @@ -143,7 +143,12 @@ async def _parse_response(self, data: dict, event, ip): if vulns: vulns_str = ", ".join([str(v) for v in vulns]) await self.emit_event( - {"description": f"Shodan reported possible vulnerabilities: {vulns_str}", "host": str(event.host)}, + { + "description": f"Shodan reported possible vulnerabilities: {vulns_str}", + "host": str(event.host), + "cves": vulns, + "name": "Shodan - Possible Vulnerabilities", + }, "FINDING", parent=event, context=f'{{module}} queried Shodan\'s InternetDB API for "{query_host}" and found potential {{event.type}}: {vulns_str}', diff --git a/bbot/modules/smuggler.py b/bbot/modules/smuggler.py index 357fec1885..4c7a78cf9e 100644 --- a/bbot/modules/smuggler.py +++ b/bbot/modules/smuggler.py @@ -40,7 +40,12 @@ async def handle_event(self, event): text = f.split(":")[1].split("-")[0].strip() description = f"[HTTP SMUGGLER] [{text}] Technique: {technique}" await self.emit_event( - {"host": str(event.host), "url": event.data, "description": description}, + { + "host": str(event.host), + "url": event.data, + "description": description, + "name": "Possible HTTP Smuggling", + }, "FINDING", parent=event, context=f"{{module}} scanned {event.data} and found HTTP smuggling ({{event.type}}): {text}", diff --git a/bbot/modules/telerik.py b/bbot/modules/telerik.py index cc5e901fd3..f0775b56eb 100644 --- a/bbot/modules/telerik.py +++ b/bbot/modules/telerik.py @@ -242,7 +242,12 @@ async def handle_event(self, event): description = f"Telerik RAU AXD Handler detected. Verbose Errors Enabled: [{str(verbose_errors)}] Version Guess: [{version}]" await self.emit_event( - {"host": str(event.host), "url": f"{base_url}{webresource}", "description": description}, + { + "host": str(event.host), + "url": f"{base_url}{webresource}", + "description": description, + "name": "Telerik Handler", + }, "FINDING", event, context=f"{{module}} scanned {base_url} and identified {{event.type}}: Telerik RAU AXD Handler", @@ -273,6 +278,7 @@ async def handle_event(self, event): "description": description, "host": str(event.host), "url": f"{base_url}{webresource}", + "name": "Telerik RCE", }, "VULNERABILITY", event, @@ -302,7 +308,12 @@ async def handle_event(self, event): self.debug(f"Detected Telerik UI instance ({dh})") description = "Telerik DialogHandler detected" await self.emit_event( - {"host": str(event.host), "url": f"{base_url}{dh}", "description": description}, + { + "host": str(event.host), + "url": f"{base_url}{dh}", + "description": description, + "name": "Telerik Handler", + }, "FINDING", event, ) @@ -326,6 +337,7 @@ async def handle_event(self, event): "host": str(event.host), "url": f"{base_url}{spellcheckhandler}", "description": description, + "name": "Telerik Handler", }, "FINDING", event, @@ -345,6 +357,7 @@ async def handle_event(self, event): "host": str(event.host), "url": f"{base_url}{chartimagehandler}", "description": "Telerik ChartImage AXD Handler Detected", + "name": "Telerik Handler", }, "FINDING", event, @@ -361,6 +374,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": "Telerik DialogHandler [SerializedParameters] Detected in HTTP Response", + "name": "Telerik Handler", }, "FINDING", event, @@ -372,6 +386,7 @@ async def handle_event(self, event): "host": str(event.host), "url": url, "description": "Telerik AsyncUpload [serializedConfiguration] Detected in HTTP Response", + "name": "Telerik AsyncUpload", }, "FINDING", event, diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py index d300d276d8..72e283fd24 100644 --- a/bbot/modules/trufflehog.py +++ b/bbot/modules/trufflehog.py @@ -123,6 +123,7 @@ async def handle_event(self, event): verified_str = "Verified" if verified else "Possible" finding_type = "VULNERABILITY" if verified else "FINDING" data = { + "name": f"TruffleHog - {detector_name}", "description": f"{verified_str} Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", } if host: diff --git a/bbot/modules/url_manipulation.py b/bbot/modules/url_manipulation.py index c36b7c39d5..e7863f2308 100644 --- a/bbot/modules/url_manipulation.py +++ b/bbot/modules/url_manipulation.py @@ -77,9 +77,14 @@ async def handle_event(self, event): if str(subject_response.status_code).startswith("2"): if "body" in reasons: reported_signature = f"Modified URL: {sig[1]}" - description = f"Url Manipulation: [{','.join(reasons)}] Sig: [{reported_signature}]" + description = f"URL Manipulation: [{','.join(reasons)}] Sig: [{reported_signature}]" await self.emit_event( - {"description": description, "host": str(event.host), "url": event.data}, + { + "description": description, + "host": str(event.host), + "url": event.data, + "name": "URL Manipulation", + }, "FINDING", parent=event, context=f"{{module}} probed {event.data} and identified {{event.type}}: {description}", diff --git a/bbot/modules/wpscan.py b/bbot/modules/wpscan.py index 4f1a63a1b5..6b128e77c8 100644 --- a/bbot/modules/wpscan.py +++ b/bbot/modules/wpscan.py @@ -174,7 +174,12 @@ def parse_wp_misc(self, interesting_json, base_url, source_event): if url_event: yield url_event yield self.make_event( - {"description": description_string, "url": url, "host": str(source_event.host)}, + { + "description": description_string, + "url": url, + "host": str(source_event.host), + "name": "WPScan - Possible Vulnerability", + }, "FINDING", source_event, ) @@ -197,6 +202,7 @@ def parse_wp_version(self, version_json, url, source_event): "host": str(source_event.host), "url": url, "description": self.vulnerability_to_s(wp_vuln), + "name": "WPScan - Possible Vulnerability", }, "VULNERABILITY", source_event, @@ -222,6 +228,7 @@ def parse_wp_themes(self, theme_json, url, source_event): "host": str(source_event.host), "url": url, "description": self.vulnerability_to_s(theme_vuln), + "name": "WPScan - Possible Vulnerability", }, "VULNERABILITY", source_event, @@ -251,6 +258,7 @@ def parse_wp_plugins(self, plugins_json, base_url, source_event): "host": str(source_event.host), "url": url, "description": self.vulnerability_to_s(vuln), + "name": "WPScan - Possible Vulnerability", }, "VULNERABILITY", source_event, From 9a4e7fc1fbafa4a8b3014f76d12a27956f3cc242 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Apr 2025 15:56:20 -0400 Subject: [PATCH 113/147] tests --- bbot/test/bbot_fixtures.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 105b6f97e1..48c5a91bbd 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -198,13 +198,16 @@ class bbot_events: "https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url, module=dummy_module ) vulnerability = scan.make_event( - {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, + {"host": "evilcorp.com", "severity": "INFO", "description": "asdf", "name": "Vulnerability"}, "VULNERABILITY", parent=scan.root_event, module=dummy_module, ) finding = scan.make_event( - {"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event, module=dummy_module + {"host": "evilcorp.com", "description": "asdf", "name": "Finding"}, + "FINDING", + parent=scan.root_event, + module=dummy_module, ) vhost = scan.make_event( {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module From baf6e6219386196107048e8a31a6f1902c311734 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 29 Apr 2025 16:13:23 -0400 Subject: [PATCH 114/147] tests --- bbot/test/test_step_1/test_events.py | 63 ++++++++++++++----- .../test_manager_scope_accuracy.py | 6 +- bbot/test/test_step_1/test_modules_basic.py | 9 ++- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 894bfeb804..3e1099d4bf 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -335,24 +335,44 @@ async def test_events(events, helpers): assert "affiliate" in corrected_event4.tags test_vuln = scan.make_event( - {"host": "EVILcorp.com", "severity": "iNfo ", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "EVILcorp.com", "severity": "iNfo ", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, ) assert test_vuln.data["host"] == "evilcorp.com" assert test_vuln.data["severity"] == "INFO" test_vuln2 = scan.make_event( - {"host": "192.168.1.1", "severity": "iNfo ", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "192.168.1.1", "severity": "iNfo ", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, ) assert json.loads(test_vuln2.data_human)["severity"] == "INFO" assert test_vuln2.host.is_private + # must have severity with pytest.raises(ValidationError, match=".*validation error.*\nseverity\n.*Field required.*"): - test_vuln = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "VULNERABILITY", dummy=True) + test_vuln = scan.make_event( + {"host": "evilcorp.com", "description": "asdf", "name": "Vulnerability"}, "VULNERABILITY", dummy=True + ) + # invalid host with pytest.raises(ValidationError, match=".*host.*\n.*Invalid host.*"): test_vuln = scan.make_event( - {"host": "!@#$", "severity": "INFO", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "!@#$", "severity": "INFO", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, ) + # invalid severity with pytest.raises(ValidationError, match=".*severity.*\n.*Invalid severity.*"): test_vuln = scan.make_event( - {"host": "evilcorp.com", "severity": "WACK", "description": "asdf"}, "VULNERABILITY", dummy=True + {"host": "evilcorp.com", "severity": "WACK", "description": "asdf", "name": "Vulnerability"}, + "VULNERABILITY", + dummy=True, + ) + # must have name + with pytest.raises(ValidationError, match=".*name.*\n.*Field required.*"): + test_vuln = scan.make_event( + {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, + "VULNERABILITY", + dummy=True, ) # test tagging @@ -941,13 +961,15 @@ def test_event_closest_host(): event3 = scan.make_event({"path": "/tmp/asdf.txt"}, "FILESYSTEM", parent=event2) assert not event3.host # finding automatically uses the host from the second event - finding = scan.make_event({"description": "test"}, "FINDING", parent=event3) + finding = scan.make_event({"description": "test", "name": "Finding"}, "FINDING", parent=event3) assert finding.data["host"] == "www.evilcorp.com" assert finding.data["url"] == "http://www.evilcorp.com/asdf" assert finding.data["path"] == "/tmp/asdf.txt" assert finding.host == "www.evilcorp.com" # same with vuln - vuln = scan.make_event({"description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3) + vuln = scan.make_event( + {"description": "test", "severity": "HIGH", "name": "Vulnerability"}, "VULNERABILITY", parent=event3 + ) assert vuln.data["host"] == "www.evilcorp.com" assert vuln.data["url"] == "http://www.evilcorp.com/asdf" assert vuln.data["path"] == "/tmp/asdf.txt" @@ -957,19 +979,29 @@ def test_event_closest_host(): event3 = scan.make_event("wat", "ASDF", parent=scan.root_event) assert not event3.host with pytest.raises(ValueError): - finding = scan.make_event({"description": "test"}, "FINDING", parent=event3) - finding = scan.make_event({"path": "/tmp/asdf.txt", "description": "test"}, "FINDING", parent=event3) + finding = scan.make_event({"description": "test", "name": "Finding"}, "FINDING", parent=event3) + finding = scan.make_event( + {"path": "/tmp/asdf.txt", "description": "test", "name": "Finding"}, "FINDING", parent=event3 + ) assert finding is not None - finding = scan.make_event({"host": "evilcorp.com", "description": "test"}, "FINDING", parent=event3) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "test", "name": "Finding"}, "FINDING", parent=event3 + ) assert finding is not None with pytest.raises(ValueError): - vuln = scan.make_event({"description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3) + vuln = scan.make_event( + {"description": "test", "severity": "HIGH", "name": "Vulnerability"}, "VULNERABILITY", parent=event3 + ) vuln = scan.make_event( - {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3 + {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH", "name": "Vulnerability"}, + "VULNERABILITY", + parent=event3, ) assert vuln is not None vuln = scan.make_event( - {"host": "evilcorp.com", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3 + {"host": "evilcorp.com", "description": "test", "severity": "HIGH", "name": "Vulnerability"}, + "VULNERABILITY", + parent=event3, ) assert vuln is not None @@ -1059,21 +1091,24 @@ def test_event_hashing(): url_event = scan.make_event("https://api.example.com/", "URL_UNVERIFIED", parent=scan.root_event) host_event_1 = scan.make_event("www.example.com", "DNS_NAME", parent=url_event) host_event_2 = scan.make_event("test.example.com", "DNS_NAME", parent=url_event) - finding_data = {"description": "Custom Yara Rule [find_string] Matched via identifier [str1]"} + finding_data = {"description": "Custom Yara Rule [find_string] Matched via identifier [str1]", "name": "Finding"} finding1 = scan.make_event(finding_data, "FINDING", parent=host_event_1) finding2 = scan.make_event(finding_data, "FINDING", parent=host_event_2) finding3 = scan.make_event(finding_data, "FINDING", parent=host_event_2) assert finding1.data == { "description": "Custom Yara Rule [find_string] Matched via identifier [str1]", + "name": "Finding", "host": "www.example.com", } assert finding2.data == { "description": "Custom Yara Rule [find_string] Matched via identifier [str1]", + "name": "Finding", "host": "test.example.com", } assert finding3.data == { "description": "Custom Yara Rule [find_string] Matched via identifier [str1]", + "name": "Finding", "host": "test.example.com", } assert finding1.id != finding2.id diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index f012b0e3e0..a976498f15 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -115,6 +115,8 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) if scan_callback is not None: scan_callback(scan) output_events = [e async for e in scan.async_start()] + # let modules initialize + await asyncio.sleep(0.5) return ( output_events, dummy_module.events, @@ -268,7 +270,9 @@ async def filter_event(self, event): async def handle_event(self, event): await self.emit_event( - {"host": str(event.host), "description": "yep", "severity": "CRITICAL"}, "VULNERABILITY", parent=event + {"host": str(event.host), "description": "yep", "severity": "CRITICAL", "name": "Vulnerability"}, + "VULNERABILITY", + parent=event, ) def custom_setup(scan): diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 07b4f6692d..27f4983bce 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -354,7 +354,14 @@ async def handle_event(self, event): # quick emit events like FINDINGS behave differently than normal ones # hosts are not speculated from them await self.emit_event( - {"host": "www.evilcorp.com", "url": "http://www.evilcorp.com", "description": "asdf"}, "FINDING", event + { + "host": "www.evilcorp.com", + "url": "http://www.evilcorp.com", + "description": "asdf", + "name": "Finding", + }, + "FINDING", + event, ) await self.emit_event("https://asdf.evilcorp.com", "URL", event, tags=["status-200"]) From 4ae4d5d7e68b2b87e6ee9bfa0c0749e7db3185b7 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 30 Apr 2025 16:35:02 -0400 Subject: [PATCH 115/147] fix tests --- bbot/defaults.yml | 9 +++++++ bbot/modules/internal/excavate.py | 24 ++++++++++++++----- bbot/modules/lightfuzz/lightfuzz.py | 7 +++++- bbot/modules/templates/bucket.py | 7 +++++- .../test_module_url_manipulation.py | 2 +- 5 files changed, 40 insertions(+), 9 deletions(-) diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 95d3eed0ec..319406410b 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -1,5 +1,14 @@ ### BASIC OPTIONS ### +# NOTE: If used in a preset, these options must be nested underneath "config:" like so: +# config: +# home: ~/.bbot +# keep_scans: 20 +# scope: +# strict: true +# dns: +# minimal: true + # BBOT working directory home: ~/.bbot # How many scan results to keep before cleaning up the older ones diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 1a7851e463..992fa5040d 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -194,7 +194,10 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte """ for results in yara_results.values(): for result in results: - event_data = {"description": f"{discovery_context} {yara_rule_settings.description}"} + event_data = { + "name": f"{discovery_context} {yara_rule_settings.description}", + "description": f"{discovery_context} {yara_rule_settings.description}", + } if yara_rule_settings.emit_match: event_data["description"] += f" [{result}]" await self.report(event_data, event, yara_rule_settings, discovery_context) @@ -261,7 +264,8 @@ async def report( # If a description is not set and is needed, provide a basic one if event_type == "FINDING" and "description" not in event_data.keys(): - event_data["description"] = f"{discovery_context} {yara_rule_settings['self.description']}" + event_data["name"] = f"{discovery_context} {yara_rule_settings.description}" + event_data["description"] = f"{discovery_context} {yara_rule_settings.description}" subject = "" if isinstance(event_data, str): subject = f" {event_data}" @@ -281,7 +285,9 @@ def __init__(self, excavate): async def process(self, yara_results, event, yara_rule_settings, discovery_context): for identifier, results in yara_results.items(): for result in results: - event_data = {} + event_data = { + "name": f"Custom Yara Rule [{self.name}]", + } description_string = ( f" with description: [{yara_rule_settings.description}]" if yara_rule_settings.description else "" ) @@ -718,7 +724,8 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte for identifier in yara_results.keys(): for findings in yara_results[identifier]: event_data = { - "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})" + "name": "Possible Verbose Error Message", + "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})", } await self.report(event_data, event, yara_rule_settings, discovery_context, event_type="FINDING") @@ -749,7 +756,8 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte for identifier in yara_results.keys(): for findings in yara_results[identifier]: event_data = { - "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})" + "name": "Possible Serialized Object", + "description": f"{discovery_context} {yara_rule_settings.description} ({identifier})", } await self.report(event_data, event, yara_rule_settings, discovery_context, event_type="FINDING") @@ -795,7 +803,11 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte def abort_if(e): return e.scope_distance > 0 - finding_data = {"host": str(host), "description": f"Non-HTTP URI: {parsed_url.geturl()}"} + finding_data = { + "host": str(host), + "name": "Non-HTTP URI", + "description": f"Non-HTTP URI: {parsed_url.geturl()}", + } await self.report(finding_data, event, yara_rule_settings, discovery_context, abort_if=abort_if) protocol_data = {"protocol": parsed_url.scheme, "host": str(host)} if port: diff --git a/bbot/modules/lightfuzz/lightfuzz.py b/bbot/modules/lightfuzz/lightfuzz.py index e178399e35..ced1ce277d 100644 --- a/bbot/modules/lightfuzz/lightfuzz.py +++ b/bbot/modules/lightfuzz/lightfuzz.py @@ -100,7 +100,12 @@ async def run_submodule(self, submodule, event): await submodule_instance.fuzz() if len(submodule_instance.results) > 0: for r in submodule_instance.results: - event_data = {"host": str(event.host), "url": event.data["url"], "description": r["description"]} + event_data = { + "host": str(event.host), + "url": event.data["url"], + "name": r["name"], + "description": r["description"], + } envelopes = getattr(event, "envelopes", None) envelope_summary = getattr(envelopes, "summary", None) diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index 3cd899d71c..537910dbcc 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -67,7 +67,12 @@ async def handle_storage_bucket(self, event): if self.supports_open_check: description, tags = await self._check_bucket_open(bucket_name, url) if description: - event_data = {"host": event.host, "url": url, "description": description} + event_data = { + "host": event.host, + "url": url, + "description": description, + "name": "Open Storage Bucket", + } await self.emit_event( event_data, "FINDING", diff --git a/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py b/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py index 725a96fecf..1961b50ce8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py +++ b/bbot/test/test_step_2/module_tests/test_module_url_manipulation.py @@ -34,6 +34,6 @@ def check(self, module_test, events): assert any( e.type == "FINDING" and e.data["description"] - == f"Url Manipulation: [body] Sig: [Modified URL: http://127.0.0.1:8888/?{module_test.module.rand_string}=.xml]" + == f"URL Manipulation: [body] Sig: [Modified URL: http://127.0.0.1:8888/?{module_test.module.rand_string}=.xml]" for e in events ) From 6bdace9c576b9121c60c1666ee5167f7a2a5ada7 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 30 Apr 2025 16:37:45 -0400 Subject: [PATCH 116/147] padding oracle --- bbot/modules/lightfuzz/submodules/crypto.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/lightfuzz/submodules/crypto.py b/bbot/modules/lightfuzz/submodules/crypto.py index 36b8ac18c6..b5bc846f64 100644 --- a/bbot/modules/lightfuzz/submodules/crypto.py +++ b/bbot/modules/lightfuzz/submodules/crypto.py @@ -286,6 +286,7 @@ async def padding_oracle(self, probe_value, cookies): { "type": "VULNERABILITY", "severity": "HIGH", + "name": "Padding Oracle Vulnerability", "description": f"Padding Oracle Vulnerability. Block size: [{str(block_size)}] {self.metadata()}", "context": context, } From 18c40a07c49c96487796b5e089094831210c3633 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 1 May 2025 15:39:52 -0400 Subject: [PATCH 117/147] fix vuln bug --- bbot/core/event/base.py | 5 ++++- bbot/test/test_step_1/test_events.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index f49a287774..44b74c5b9e 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1086,9 +1086,10 @@ def __init__(self, *args, **kwargs): parent_path = parent.data.get("path", None) if parent_path is not None: self.data["path"] = parent_path - # inherit closest host + # inherit closest host+port if parent.host: self.data["host"] = str(parent.host) + self.port = parent.port # we do this to refresh the hash self.data = self.data break @@ -1099,6 +1100,7 @@ def __init__(self, *args, **kwargs): class DictPathEvent(DictEvent): def sanitize_data(self, data): + data = super().sanitize_data(data) new_data = dict(data) new_data["path"] = str(new_data["path"]) file_blobs = getattr(self.scan, "_file_blobs", False) @@ -1557,6 +1559,7 @@ class VULNERABILITY(ClosestHostEvent): } def sanitize_data(self, data): + data = super().sanitize_data(data) self.add_tag(data["severity"].lower()) return data diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 3e1099d4bf..6292793e46 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -12,6 +12,16 @@ async def test_events(events, helpers): scan = Scanner() await scan._prep() + # port and netloc derived from URL + test_vuln = scan.make_event( + {"host": "evilcorp.com", "name": "test", "severity": "INFO", "description": "asdf", "url": "http://evilcorp.com/test"}, + "VULNERABILITY", + dummy=True, + ) + assert test_vuln.host == "evilcorp.com" + assert test_vuln.port == 80 + assert test_vuln.netloc == "evilcorp.com:80" + assert events.ipv4.type == "IP_ADDRESS" assert events.ipv4.netloc == "8.8.8.8" assert events.ipv4.port is None From ee5ef0866980ac25e9c8b854a08e3a959cde7faf Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 1 May 2025 15:40:22 -0400 Subject: [PATCH 118/147] ruffed --- bbot/test/test_step_1/test_events.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 6292793e46..e1e59b644b 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -12,16 +12,6 @@ async def test_events(events, helpers): scan = Scanner() await scan._prep() - # port and netloc derived from URL - test_vuln = scan.make_event( - {"host": "evilcorp.com", "name": "test", "severity": "INFO", "description": "asdf", "url": "http://evilcorp.com/test"}, - "VULNERABILITY", - dummy=True, - ) - assert test_vuln.host == "evilcorp.com" - assert test_vuln.port == 80 - assert test_vuln.netloc == "evilcorp.com:80" - assert events.ipv4.type == "IP_ADDRESS" assert events.ipv4.netloc == "8.8.8.8" assert events.ipv4.port is None @@ -385,6 +375,22 @@ async def test_events(events, helpers): dummy=True, ) + # port and netloc should be derived from URL + test_vuln = scan.make_event( + { + "host": "evilcorp.com", + "name": "test", + "severity": "INFO", + "description": "asdf", + "url": "http://evilcorp.com/test", + }, + "VULNERABILITY", + dummy=True, + ) + assert test_vuln.host == "evilcorp.com" + assert test_vuln.port == 80 + assert test_vuln.netloc == "evilcorp.com:80" + # test tagging ip_event_1 = scan.make_event("8.8.8.8", dummy=True) assert "private-ip" not in ip_event_1.tags From 355ca15c905a43607775185181e20dd86aa996a4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 1 May 2025 17:04:20 -0400 Subject: [PATCH 119/147] fix finding bug --- bbot/core/event/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 44b74c5b9e..9f9fa32e0b 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1089,7 +1089,7 @@ def __init__(self, *args, **kwargs): # inherit closest host+port if parent.host: self.data["host"] = str(parent.host) - self.port = parent.port + self._port = parent.port # we do this to refresh the hash self.data = self.data break From 382c8b34113bb17222c4e8816b14ecfcc177e143 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 May 2025 16:47:49 -0400 Subject: [PATCH 120/147] lowercase technology --- bbot/core/event/base.py | 4 ++++ bbot/test/test_step_1/test_events.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 9f9fa32e0b..8d28493a78 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1604,6 +1604,10 @@ class _data_validator(BaseModel): _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) + def _sanitize_data(self, data): + data["technology"] = data["technology"].lower() + return data + def _data_id(self): # dedupe by host+port+tech tech = self.data.get("technology", "") diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index e1e59b644b..f602af415c 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -391,6 +391,14 @@ async def test_events(events, helpers): assert test_vuln.port == 80 assert test_vuln.netloc == "evilcorp.com:80" + # technology should be lowercased + tech_event = scan.make_event( + {"host": "evilcorp.com", "technology": "HTTP", "url": "http://evilcorp.com/test"}, + "TECHNOLOGY", + dummy=True, + ) + assert tech_event.data["technology"] == "http" + # test tagging ip_event_1 = scan.make_event("8.8.8.8", dummy=True) assert "private-ip" not in ip_event_1.tags From 9418e0566c1287e545287dd272d5517de09716fa Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 16:30:22 -0400 Subject: [PATCH 121/147] bbot constants --- bbot/constants.py | 31 +++++++++++++++++++++++++++++++ bbot/scanner/scanner.py | 18 ++++-------------- 2 files changed, 35 insertions(+), 14 deletions(-) create mode 100644 bbot/constants.py diff --git a/bbot/constants.py b/bbot/constants.py new file mode 100644 index 0000000000..a5ce0bde90 --- /dev/null +++ b/bbot/constants.py @@ -0,0 +1,31 @@ + + +SCAN_STATUSES = { + "NOT_STARTED": 0, + "STARTING": 1, + "RUNNING": 2, + "FINISHING": 3, + "ABORTING": 5, + "ABORTED": 6, + "FAILED": 7, + "FINISHED": 8, +} + +SCAN_STATUS_CODES = {v: k for k, v in SCAN_STATUSES.items()} + + +def get_scan_status_code(status): + """ + Convert a scan status string to a numeric status code + """ + try: + if isinstance(status, int): + if not status in SCAN_STATUS_CODES: + raise ValueError(f"Invalid scan status code: {status}") + return status + elif isinstance(status, str): + return SCAN_STATUSES[status] + else: + raise ValueError(f"Invalid scan status: {status} (must be int or str)") + except KeyError: + raise ValueError(f"Invalid scan status: {status}") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index e4edddacf4..36e310c96b 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -12,6 +12,7 @@ from bbot import __version__ from bbot.core.event import make_event +from bbot.constants import SCAN_STATUSES from .manager import ScanIngress, ScanEgress from bbot.core.helpers.misc import sha1, rand_string from bbot.core.helpers.names_generator import random_name @@ -84,17 +85,6 @@ class Scanner: - Setting a status will trigger the `on_status` event in the dispatcher. """ - _status_codes = { - "NOT_STARTED": 0, - "STARTING": 1, - "RUNNING": 2, - "FINISHING": 3, - "ABORTING": 5, - "ABORTED": 6, - "FAILED": 7, - "FINISHED": 8, - } - def __init__( self, *targets, @@ -977,16 +967,16 @@ def status(self, status): """ status = str(status).strip().upper() self.debug(f"Setting scan status from {self.status} to {status}") - if status in self._status_codes: + if status in SCAN_STATUSES: # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again - if self._status_code >= self._status_codes["ABORTED"]: + if self._status_code >= SCAN_STATUSES["ABORTED"]: self.debug(f'Attempt to set invalid status "{status}" on already finished scan') return if status == self._status: self.debug(f'Scan status is already "{status}"') return self._status = status - self._status_code = self._status_codes[status] + self._status_code = SCAN_STATUSES[status] # clean out old dispatcher tasks for task in list(self.dispatcher_tasks): if task.done(): From ffdf28dd1accb8d9c68ece399352e1c18498afcc Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 16:30:31 -0400 Subject: [PATCH 122/147] ruffed --- bbot/constants.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/constants.py b/bbot/constants.py index a5ce0bde90..9da7fb839a 100644 --- a/bbot/constants.py +++ b/bbot/constants.py @@ -1,5 +1,3 @@ - - SCAN_STATUSES = { "NOT_STARTED": 0, "STARTING": 1, From 4e0f0f9faa4f9692e2a94f735967883c2336819b Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 16:41:48 -0400 Subject: [PATCH 123/147] ruffed --- bbot/constants.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/bbot/constants.py b/bbot/constants.py index 9da7fb839a..587b43752d 100644 --- a/bbot/constants.py +++ b/bbot/constants.py @@ -12,13 +12,44 @@ SCAN_STATUS_CODES = {v: k for k, v in SCAN_STATUSES.items()} +def is_valid_scan_status(status): + """ + Check if a status is a valid scan status + """ + return status in SCAN_STATUSES + + +def is_valid_scan_status_code(status): + """ + Check if a status is a valid scan status code + """ + return status in SCAN_STATUS_CODES + + +def get_scan_status_name(status): + """ + Convert a numeric scan status code to a string status name + """ + try: + if isinstance(status, str): + if not is_valid_scan_status(status): + raise ValueError(f"Invalid scan status: {status}") + return SCAN_STATUS_CODES[status] + elif isinstance(status, int): + return SCAN_STATUSES[status] + else: + raise ValueError(f"Invalid scan status: {status} (must be int or str)") + except KeyError: + raise ValueError(f"Invalid scan status: {status}") + + def get_scan_status_code(status): """ Convert a scan status string to a numeric status code """ try: if isinstance(status, int): - if not status in SCAN_STATUS_CODES: + if not is_valid_scan_status_code(status): raise ValueError(f"Invalid scan status code: {status}") return status elif isinstance(status, str): From 25c20ff2bd057c6f187b80c950e1d8d3e02c4636 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 16:48:10 -0400 Subject: [PATCH 124/147] include status code --- bbot/scanner/scanner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 36e310c96b..2f126b9957 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -1019,6 +1019,7 @@ def root_event(self): if self._root_event is None: self._root_event = self.make_root_event(f"Scan {self.name} started at {self.start_time}") self._root_event.data["status"] = self.status + self._root_event.data["status_code"] = self._status_code return self._root_event def finish_event(self, context=None, status=None): From 1ca1f55d508f454972476e04179ea89bb698a335 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 17:03:22 -0400 Subject: [PATCH 125/147] fix bug --- bbot/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/constants.py b/bbot/constants.py index 587b43752d..aefc534edc 100644 --- a/bbot/constants.py +++ b/bbot/constants.py @@ -34,9 +34,9 @@ def get_scan_status_name(status): if isinstance(status, str): if not is_valid_scan_status(status): raise ValueError(f"Invalid scan status: {status}") - return SCAN_STATUS_CODES[status] + return status elif isinstance(status, int): - return SCAN_STATUSES[status] + return SCAN_STATUS_CODES[status] else: raise ValueError(f"Invalid scan status: {status} (must be int or str)") except KeyError: From 1b08de12d0c3e43c90600a2d57d4f4080a6f4e2f Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 17:56:11 -0400 Subject: [PATCH 126/147] scan status rework --- bbot/constants.py | 28 +++++++++---- bbot/scanner/scanner.py | 91 +++++++++++++++++++++++------------------ 2 files changed, 72 insertions(+), 47 deletions(-) diff --git a/bbot/constants.py b/bbot/constants.py index aefc534edc..e3de41db1f 100644 --- a/bbot/constants.py +++ b/bbot/constants.py @@ -1,12 +1,24 @@ +SCAN_STATUS_QUEUED = 0 +SCAN_STATUS_NOT_STARTED = 1 +SCAN_STATUS_STARTING = 2 +SCAN_STATUS_RUNNING = 3 +SCAN_STATUS_FINISHING = 4 +SCAN_STATUS_ABORTING = 5 +SCAN_STATUS_ABORTED = 6 +SCAN_STATUS_FAILED = 7 +SCAN_STATUS_FINISHED = 8 + + SCAN_STATUSES = { - "NOT_STARTED": 0, - "STARTING": 1, - "RUNNING": 2, - "FINISHING": 3, - "ABORTING": 5, - "ABORTED": 6, - "FAILED": 7, - "FINISHED": 8, + "QUEUED": SCAN_STATUS_QUEUED, + "NOT_STARTED": SCAN_STATUS_NOT_STARTED, + "STARTING": SCAN_STATUS_STARTING, + "RUNNING": SCAN_STATUS_RUNNING, + "FINISHING": SCAN_STATUS_FINISHING, + "ABORTING": SCAN_STATUS_ABORTING, + "ABORTED": SCAN_STATUS_ABORTED, + "FAILED": SCAN_STATUS_FAILED, + "FINISHED": SCAN_STATUS_FINISHED, } SCAN_STATUS_CODES = {v: k for k, v in SCAN_STATUSES.items()} diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 2f126b9957..ff50302f94 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -12,7 +12,6 @@ from bbot import __version__ from bbot.core.event import make_event -from bbot.constants import SCAN_STATUSES from .manager import ScanIngress, ScanEgress from bbot.core.helpers.misc import sha1, rand_string from bbot.core.helpers.names_generator import random_name @@ -20,6 +19,18 @@ from bbot.core.multiprocess import SHARED_INTERPRETER_STATE from bbot.core.helpers.async_helpers import async_to_sync_gen from bbot.errors import BBOTError, ScanError, ValidationError +from bbot.constants import ( + get_scan_status_code, + get_scan_status_name, + SCAN_STATUS_NOT_STARTED, + SCAN_STATUS_STARTING, + SCAN_STATUS_RUNNING, + SCAN_STATUS_FINISHING, + SCAN_STATUS_ABORTING, + SCAN_STATUS_ABORTED, + SCAN_STATUS_FAILED, + SCAN_STATUS_FINISHED, +) log = logging.getLogger("bbot.scanner") @@ -165,8 +176,7 @@ def __init__( else: self.home = self.preset.bbot_home / "scans" / self.name - self._status = "NOT_STARTED" - self._status_code = 0 + self._status_code = SCAN_STATUS_NOT_STARTED self.modules = OrderedDict({}) self._modules_loaded = False @@ -344,18 +354,18 @@ async def async_start(self): self._status_ticker(self.status_frequency), name=f"{self.name}._status_ticker()" ) - self.status = "STARTING" + self.status = SCAN_STATUS_STARTING if not self.modules: self.error("No modules loaded") - self.status = "FAILED" + self.status = SCAN_STATUS_FAILED return else: self.hugesuccess(f"Starting scan {self.name}") await self.dispatcher.on_start(self) - self.status = "RUNNING" + self.status = SCAN_STATUS_RUNNING self._start_modules() self.verbose(f"{len(self.modules):,} modules started") @@ -438,12 +448,14 @@ async def _mark_finished(self): self._marked_finished = True - if self.status == "ABORTING": - status = "ABORTED" + if self._status_code == SCAN_STATUS_ABORTING: + status_code = SCAN_STATUS_ABORTED elif not self._success: - status = "FAILED" + status_code = SCAN_STATUS_FAILED else: - status = "FINISHED" + status_code = SCAN_STATUS_FINISHED + + status = get_scan_status_name(status_code) self.end_time = datetime.now(ZoneInfo("UTC")) self.duration = self.end_time - self.start_time @@ -940,19 +952,19 @@ def stopping(self): @property def stopped(self): - return self._status_code > 5 + return self._status_code >= SCAN_STATUS_ABORTED @property def running(self): - return 0 < self._status_code < 4 + return SCAN_STATUS_STARTING <= self._status_code <= SCAN_STATUS_FINISHING @property def aborting(self): - return 5 <= self._status_code <= 6 + return SCAN_STATUS_ABORTING <= self._status_code <= SCAN_STATUS_ABORTED @property def status(self): - return self._status + return get_scan_status_name(self._status_code) @property def omitted_event_types(self): @@ -965,30 +977,28 @@ def status(self, status): """ Block setting after status has been aborted """ - status = str(status).strip().upper() + try: + status_code = get_scan_status_code(status) + status = get_scan_status_name(status_code) + except ValueError: + self.warning(f'Attempt to set invalid status "{status}" on scan') + self.debug(f"Setting scan status from {self.status} to {status}") - if status in SCAN_STATUSES: - # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again - if self._status_code >= SCAN_STATUSES["ABORTED"]: - self.debug(f'Attempt to set invalid status "{status}" on already finished scan') - return - if status == self._status: - self.debug(f'Scan status is already "{status}"') - return - self._status = status - self._status_code = SCAN_STATUSES[status] - # clean out old dispatcher tasks - for task in list(self.dispatcher_tasks): - if task.done(): - self.dispatcher_tasks.remove(task) - self.dispatcher_tasks.append( - asyncio.create_task( - self.dispatcher.catch(self.dispatcher.on_status, self._status, self.id), - name=f"{self.name}.dispatcher.on_status({status})", - ) + # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again + if status_code < self._status_code: + self.debug(f'Attempt to set invalid status "{status}" on scan with status "{self.status}"') + return + self._status_code = status_code + # clean out old dispatcher tasks + for task in list(self.dispatcher_tasks): + if task.done(): + self.dispatcher_tasks.remove(task) + self.dispatcher_tasks.append( + asyncio.create_task( + self.dispatcher.catch(self.dispatcher.on_status, self.status, self.id), + name=f"{self.name}.dispatcher.on_status({status})", ) - else: - self.warning(f'Attempt to set invalid status "{status}" on scan') + ) def make_event(self, *args, **kwargs): kwargs["scan"] = self @@ -1022,12 +1032,15 @@ def root_event(self): self._root_event.data["status_code"] = self._status_code return self._root_event - def finish_event(self, context=None, status=None): + def finish_event(self, context=None, status_code=None): if self._finish_event is None: - if context is None or status is None: - raise ValueError("Must specify context and status") + if context is None or status_code is None: + raise ValueError("Must specify context and status_code") self._finish_event = self.make_root_event(context) + status_code = get_scan_status_code(status_code) + status = get_scan_status_name(status_code) self._finish_event.data["status"] = status + self._finish_event.data["status_code"] = status_code return self._finish_event def make_root_event(self, context): From ea8b6fd0d33c0bf213db7ea020b0bbd1b1eaf80f Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 May 2025 18:01:44 -0400 Subject: [PATCH 127/147] status code --- bbot/scanner/scanner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index ff50302f94..3dc51ac901 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -985,7 +985,7 @@ def status(self, status): self.debug(f"Setting scan status from {self.status} to {status}") # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again - if status_code < self._status_code: + if status_code <= self._status_code: self.debug(f'Attempt to set invalid status "{status}" on scan with status "{self.status}"') return self._status_code = status_code From 72ae23f9297af36df9d86490c654c8efa79d42a8 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 6 May 2025 02:04:27 -0400 Subject: [PATCH 128/147] cancellation --- bbot/core/config/logger.py | 6 +++++- bbot/modules/base.py | 4 ++-- bbot/scanner/dispatcher.py | 13 +++++++------ bbot/scanner/scanner.py | 36 ++++++++++++++---------------------- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/bbot/core/config/logger.py b/bbot/core/config/logger.py index c5773a3a0c..4f22b5157e 100644 --- a/bbot/core/config/logger.py +++ b/bbot/core/config/logger.py @@ -2,6 +2,7 @@ import sys import atexit import logging +import threading from copy import copy import multiprocessing import logging.handlers @@ -93,7 +94,10 @@ def cleanup_logging(self): # Stop queue listener with suppress(Exception): - self.listener.stop() + stop_thread = threading.Thread(target=self.listener.stop) + stop_thread.daemon = True + stop_thread.start() + stop_thread.join() def setup_queue_handler(self, logging_queue=None, log_level=logging.DEBUG): if logging_queue is None: diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 0e68c6b254..0062ba9da9 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -708,7 +708,7 @@ async def _worker(self): self.trace(f"RuntimeError in module {self.name}: {e}") except BaseException as e: if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): - self.scan.stop() + await self.scan.async_stop() else: self.error(f"Critical failure in module {self.name}: {e}") self.error(traceback.format_exc()) @@ -1730,7 +1730,7 @@ async def _worker(self): self.trace(f"RuntimeError in intercept module {self.name}: {e}") except BaseException as e: if self.helpers.in_exception_chain(e, (KeyboardInterrupt,)): - self.scan.stop() + await self.scan.async_stop() else: self.critical(f"Critical failure in intercept module {self.name}: {e}") self.critical(traceback.format_exc()) diff --git a/bbot/scanner/dispatcher.py b/bbot/scanner/dispatcher.py index a9c56c2b72..efd3270903 100644 --- a/bbot/scanner/dispatcher.py +++ b/bbot/scanner/dispatcher.py @@ -1,7 +1,6 @@ import logging import traceback - -log = logging.getLogger("bbot.scanner.dispatcher") +import contextlib class Dispatcher: @@ -11,6 +10,7 @@ class Dispatcher: def set_scan(self, scan): self.scan = scan + self.log = logging.getLogger("bbot.scanner.dispatcher") async def on_start(self, scan): return @@ -24,9 +24,10 @@ async def on_status(self, status, scan_id): """ self.scan.debug(f"Setting scan status to {status}") - async def catch(self, callback, *args, **kwargs): + @contextlib.contextmanager + def catch(self): try: - return await callback(*args, **kwargs) + yield except Exception as e: - log.error(f"Error in {callback.__qualname__}(): {e}") - log.trace(traceback.format_exc()) + self.log.error(f"Error in dispatcher: {e}") + self.log.trace(traceback.format_exc()) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 3dc51ac901..6556f4e17d 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -335,9 +335,9 @@ async def async_start_without_generator(self): pass async def async_start(self): - """ """ self.start_time = datetime.now(ZoneInfo("UTC")) self.root_event.data["started_at"] = self.start_time.timestamp() + await self._set_status(SCAN_STATUS_STARTING) try: await self._prep() @@ -354,18 +354,16 @@ async def async_start(self): self._status_ticker(self.status_frequency), name=f"{self.name}._status_ticker()" ) - self.status = SCAN_STATUS_STARTING - if not self.modules: self.error("No modules loaded") - self.status = SCAN_STATUS_FAILED + await self._set_status(SCAN_STATUS_FAILED) return else: self.hugesuccess(f"Starting scan {self.name}") await self.dispatcher.on_start(self) - self.status = SCAN_STATUS_RUNNING + await self._set_status(SCAN_STATUS_RUNNING) self._start_modules() self.verbose(f"{len(self.modules):,} modules started") @@ -403,7 +401,7 @@ async def async_start(self): except BaseException as e: if self.helpers.in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): - self.stop() + await self.async_stop() self._success = True else: try: @@ -480,7 +478,7 @@ async def _mark_finished(self): break await asyncio.sleep(0.05) - self.status = status + await self._set_status(status) return scan_finish_event def _start_modules(self): @@ -754,7 +752,7 @@ def modules_status(self, _log=False, detailed=False): return status - def stop(self): + async def async_stop(self): """Stops the in-progress scan and performs necessary cleanup. This method sets the scan's status to "ABORTING," cancels any pending tasks, and drains event queues. It also kills child processes spawned during the scan. @@ -764,7 +762,7 @@ def stop(self): """ if not self._stopping: self._stopping = True - self.status = "ABORTING" + await self._set_status(SCAN_STATUS_ABORTING) self.hugewarning("Aborting scan") self.trace() self._cancel_tasks() @@ -773,7 +771,10 @@ def stop(self): self._drain_queues() self.helpers.kill_children() self.debug("Finished aborting scan") - self.status = "ABORTED" + await self._set_status(SCAN_STATUS_ABORTED) + + def stop(self): + asyncio.create_task(self.async_stop()) async def finish(self): """Finalizes the scan by invoking the `finished()` method on all active modules if new activity is detected. @@ -972,8 +973,7 @@ def omitted_event_types(self): self._omitted_event_types = self.config.get("omit_event_types", []) return self._omitted_event_types - @status.setter - def status(self, status): + async def _set_status(self, status): """ Block setting after status has been aborted """ @@ -989,16 +989,8 @@ def status(self, status): self.debug(f'Attempt to set invalid status "{status}" on scan with status "{self.status}"') return self._status_code = status_code - # clean out old dispatcher tasks - for task in list(self.dispatcher_tasks): - if task.done(): - self.dispatcher_tasks.remove(task) - self.dispatcher_tasks.append( - asyncio.create_task( - self.dispatcher.catch(self.dispatcher.on_status, self.status, self.id), - name=f"{self.name}.dispatcher.on_status({status})", - ) - ) + with self.dispatcher.catch(): + await self.dispatcher.on_status(self.status, self.id) def make_event(self, *args, **kwargs): kwargs["scan"] = self From f67b97f9824d4ee39adee614ef3ae93b02a45448 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 6 May 2025 02:09:36 -0400 Subject: [PATCH 129/147] things --- bbot/scanner/scanner.py | 2 +- bbot/test/test_step_1/test_modules_basic.py | 6 +++--- bbot/test/test_step_1/test_web.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 6556f4e17d..7ec4529e1d 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -791,7 +791,7 @@ async def finish(self): # if new events were generated since last time we were here if self._new_activity: self._new_activity = False - self.status = "FINISHING" + await self._set_status(SCAN_STATUS_FINISHING) # Trigger .finished() on every module and start over log.info("Finishing scan") for module in self.modules.values(): diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 27f4983bce..9e2e21e12e 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -242,7 +242,7 @@ class mod_domain_only(BaseModule): scan.modules["mod_host_only"] = mod_host_only(scan) scan.modules["mod_hostport_only"] = mod_hostport_only(scan) scan.modules["mod_domain_only"] = mod_domain_only(scan) - scan.status = "RUNNING" + await scan._set_status("RUNNING") url_1 = scan.make_event("http://evilcorp.com/1", event_type="URL", parent=scan.root_event, tags=["status-200"]) url_2 = scan.make_event("http://evilcorp.com/2", event_type="URL", parent=scan.root_event, tags=["status-200"]) @@ -310,7 +310,7 @@ async def test_modules_basic_perdomainonly(bbot_scanner, monkeypatch): await per_domain_scan.load_modules() await per_domain_scan.setup_modules() - per_domain_scan.status = "RUNNING" + await per_domain_scan._set_status("RUNNING") # ensure that multiple events to the same "host" (schema + host) are blocked and check the per host tracker @@ -456,7 +456,7 @@ async def test_module_loading(bbot_scanner): force_start=True, ) await scan2.load_modules() - scan2.status = "RUNNING" + await scan2._set_status("RUNNING") # attributes, descriptions, etc. for module_name, module in sorted(scan2.modules.items()): diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index 96079b5f04..235d498e2f 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -289,7 +289,7 @@ async def test_web_interactsh(bbot_scanner, bbot_httpserver): async_correct_url = False scan1 = bbot_scanner("8.8.8.8") - scan1.status = "RUNNING" + await scan1._set_status("RUNNING") interactsh_client = scan1.helpers.interactsh(poll_interval=3) interactsh_client2 = scan1.helpers.interactsh(poll_interval=3) From f34d62b012d5dfbe1a3d4aea4c2440e9c07f5d60 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 6 May 2025 03:56:11 -0400 Subject: [PATCH 130/147] tech port --- bbot/core/event/base.py | 5 ++++- bbot/test/test_step_1/test_events.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 8d28493a78..27e9ef5a86 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -403,6 +403,8 @@ def host_filterable(self): @property def port(self): self.host + if self._port: + return self._port if getattr(self, "parsed_url", None): if self.parsed_url.port is not None: return self.parsed_url.port @@ -410,7 +412,6 @@ def port(self): return 443 elif self.parsed_url.scheme == "http": return 80 - return self._port @property def netloc(self): @@ -1605,6 +1606,7 @@ class _data_validator(BaseModel): _validate_host = field_validator("host")(validators.validate_host) def _sanitize_data(self, data): + data = super()._sanitize_data(data) data["technology"] = data["technology"].lower() return data @@ -1737,6 +1739,7 @@ class MOBILE_APP(DictEvent): _always_emit = True def _sanitize_data(self, data): + data = super()._sanitize_data(data) if isinstance(data, str): data = {"url": data} if "url" not in data: diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index f602af415c..354167dd77 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -398,6 +398,7 @@ async def test_events(events, helpers): dummy=True, ) assert tech_event.data["technology"] == "http" + assert tech_event.port == 80 # test tagging ip_event_1 = scan.make_event("8.8.8.8", dummy=True) From 12bda753b81b7888336f79b59d49a4389ad6dcf4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 6 May 2025 12:01:51 -0400 Subject: [PATCH 131/147] bbot things --- bbot/constants.py | 8 ++++---- bbot/scanner/scanner.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bbot/constants.py b/bbot/constants.py index e3de41db1f..364aead2ff 100644 --- a/bbot/constants.py +++ b/bbot/constants.py @@ -4,9 +4,9 @@ SCAN_STATUS_RUNNING = 3 SCAN_STATUS_FINISHING = 4 SCAN_STATUS_ABORTING = 5 -SCAN_STATUS_ABORTED = 6 +SCAN_STATUS_FINISHED = 6 SCAN_STATUS_FAILED = 7 -SCAN_STATUS_FINISHED = 8 +SCAN_STATUS_ABORTED = 8 SCAN_STATUSES = { @@ -16,9 +16,9 @@ "RUNNING": SCAN_STATUS_RUNNING, "FINISHING": SCAN_STATUS_FINISHING, "ABORTING": SCAN_STATUS_ABORTING, - "ABORTED": SCAN_STATUS_ABORTED, - "FAILED": SCAN_STATUS_FAILED, "FINISHED": SCAN_STATUS_FINISHED, + "FAILED": SCAN_STATUS_FAILED, + "ABORTED": SCAN_STATUS_ABORTED, } SCAN_STATUS_CODES = {v: k for k, v in SCAN_STATUSES.items()} diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 7ec4529e1d..87a42e56cb 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -988,6 +988,7 @@ async def _set_status(self, status): if status_code <= self._status_code: self.debug(f'Attempt to set invalid status "{status}" on scan with status "{self.status}"') return + self._status_code = status_code with self.dispatcher.catch(): await self.dispatcher.on_status(self.status, self.id) From 9cdb3c7534c00a939a448d8197676c23ea2da626 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 6 May 2025 16:09:44 -0400 Subject: [PATCH 132/147] stringify output dir --- bbot/scanner/preset/preset.py | 2 +- bbot/scanner/scanner.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 91f59d790c..341d808f8f 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -826,7 +826,7 @@ def to_dict(self, include_target=False, full_config=False, redact_secrets=False) if self.scan_name: preset_dict["scan_name"] = self.scan_name if self.scan_name and self.output_dir is not None: - preset_dict["output_dir"] = self.output_dir + preset_dict["output_dir"] = str(self.output_dir) # conditions if self.conditions: diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 87a42e56cb..cbbca21485 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -974,9 +974,6 @@ def omitted_event_types(self): return self._omitted_event_types async def _set_status(self, status): - """ - Block setting after status has been aborted - """ try: status_code = get_scan_status_code(status) status = get_scan_status_name(status_code) @@ -984,7 +981,7 @@ async def _set_status(self, status): self.warning(f'Attempt to set invalid status "{status}" on scan') self.debug(f"Setting scan status from {self.status} to {status}") - # if the scan has already been marked as ABORTED/FAILED/FINISHED, don't allow setting status again + # if the status isn't progressing forward, skip setting it if status_code <= self._status_code: self.debug(f'Attempt to set invalid status "{status}" on scan with status "{self.status}"') return From 11c684fb4975f78308b7e96d9281ff99fc206741 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 May 2025 10:45:31 -0400 Subject: [PATCH 133/147] fix tests --- bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py | 4 ++-- bbot/test/test_step_2/module_tests/test_module_gitlab.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py index 8accc7c300..6d6fbeb1c6 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py +++ b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py @@ -92,7 +92,7 @@ def check(self, module_test, events): dnn_installwizard_privesc_detection = False for e in events: - if e.type == "TECHNOLOGY" and "DotNetNuke" in e.data["technology"]: + if e.type == "TECHNOLOGY" and "dotnetnuke" in e.data["technology"]: dnn_technology_detection = True if ( @@ -165,7 +165,7 @@ def check(self, module_test, events): dnn_dnnimagehandler_blindssrf = False for e in events: - if e.type == "TECHNOLOGY" and "DotNetNuke" in e.data["technology"]: + if e.type == "TECHNOLOGY" and "dotnetnuke" in e.data["technology"]: dnn_technology_detection = True if e.type == "VULNERABILITY" and "DotNetNuke Blind-SSRF (CVE 2017-0929)" in e.data["description"]: diff --git a/bbot/test/test_step_2/module_tests/test_module_gitlab.py b/bbot/test/test_step_2/module_tests/test_module_gitlab.py index 6d593adf65..4396ca36ae 100644 --- a/bbot/test/test_step_2/module_tests/test_module_gitlab.py +++ b/bbot/test/test_step_2/module_tests/test_module_gitlab.py @@ -167,7 +167,7 @@ def check(self, module_test, events): e for e in events if e.type == "TECHNOLOGY" - and e.data["technology"] == "GitLab" + and e.data["technology"] == "gitlab" and e.data["url"] == "http://127.0.0.1:8888/" ] ) From e48005bcdbbf09aad4bd9424f5f5dff902e297e7 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 May 2025 20:50:35 -0400 Subject: [PATCH 134/147] fix tests --- .../test_step_2/module_tests/test_module_excavate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index 7589a9d0b9..139a779218 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -1409,9 +1409,16 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests({"uri": "/"}, {"response_data": self.bad_url_data}) def check(self, module_test, events): + import gzip + debug_log_content = open(module_test.scan.home / "debug.log").read() + for archived_debug_log in module_test.scan.home.glob("debug.log.*.gz"): + gzipped_content = open(archived_debug_log).read() + ungzipped_content = gzip.decompress(gzipped_content).decode("utf-8") + debug_log_content += ungzipped_content + # make sure our logging is working - assert "Setting scan status to STARTING" in debug_log_content + assert "Setting scan status to RUNNING" in debug_log_content # make sure we don't have any URL validation errors assert "Error Parsing reconstructed URL" not in debug_log_content assert "Error sanitizing event data" not in debug_log_content From 00389b5945a3732b2b93f4008c89ac58e357c324 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 10 May 2025 01:24:03 -0400 Subject: [PATCH 135/147] fix bug --- bbot/scanner/preset/preset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 341d808f8f..06866d9801 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -76,8 +76,8 @@ class Preset(metaclass=BasePreset): Based on the state of the preset, you can print a warning message, abort the scan, enable/disable modules, etc.. Attributes: - target (Target): Target(s) of scan. - whitelist (Target): Scan whitelist (by default this is the same as `target`). + targets (Target): Target(s) of scan. + whitelist (Target): Scan whitelist (by default this is the same as `targets`). blacklist (Target): Scan blacklist (this takes ultimate precedence). helpers (ConfigAwareHelper): Helper containing various reusable functions, regexes, etc. output_dir (pathlib.Path): Output directory for scan. @@ -290,7 +290,7 @@ def target(self): @property def seeds(self): - if self._seeds is None: + if self._target is None: raise ValueError("Cannot access target before preset is baked (use ._seeds instead)") return self.target.seeds @@ -658,7 +658,7 @@ def from_dict(cls, preset_dict, name=None, _exclude=None, _log=False): >>> preset = Preset.from_dict({"target": ["evilcorp.com"], "modules": ["portscan"]}) """ new_preset = cls( - *preset_dict.get("target", []), + *preset_dict.get("targets", []), whitelist=preset_dict.get("whitelist"), blacklist=preset_dict.get("blacklist"), modules=preset_dict.get("modules"), From 69c5fe563416fe1b5948dc65a5407800319b5fb9 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 10 May 2025 01:40:42 -0400 Subject: [PATCH 136/147] scope test --- bbot/scanner/preset/preset.py | 14 ++++++++------ bbot/test/test_step_1/test_presets.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 06866d9801..3d317d73dc 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -76,8 +76,8 @@ class Preset(metaclass=BasePreset): Based on the state of the preset, you can print a warning message, abort the scan, enable/disable modules, etc.. Attributes: - targets (Target): Target(s) of scan. - whitelist (Target): Scan whitelist (by default this is the same as `targets`). + target (Target): Target(s) of scan. + whitelist (Target): Scan whitelist (by default this is the same as `target`). blacklist (Target): Scan blacklist (this takes ultimate precedence). helpers (ConfigAwareHelper): Helper containing various reusable functions, regexes, etc. output_dir (pathlib.Path): Output directory for scan. @@ -115,7 +115,7 @@ class Preset(metaclass=BasePreset): def __init__( self, - *targets, + *target, whitelist=None, blacklist=None, modules=None, @@ -142,7 +142,7 @@ def __init__( Initializes the Preset class. Args: - *targets (str): Target(s) to scan. Types supported: hostnames, IPs, CIDRs, emails, open ports. + *target (str): Target(s) to scan. Types supported: hostnames, IPs, CIDRs, emails, open ports. whitelist (list, optional): Whitelisted target(s) to scan. Defaults to the same as `targets`. blacklist (list, optional): Blacklisted target(s). Takes ultimate precedence. Defaults to empty. modules (list[str], optional): List of scan modules to enable for the scan. Defaults to empty list. @@ -262,7 +262,7 @@ def __init__( # target / whitelist / blacklist # these are temporary receptacles until they all get .baked() together - self._seeds = set(targets if targets else []) + self._seeds = set(target if target else []) self._whitelist = set(whitelist) if whitelist else whitelist self._blacklist = set(blacklist if blacklist else []) @@ -404,6 +404,8 @@ def bake(self, scan=None): """ Return a "baked" copy of this preset, ready for use by a BBOT scan. + Presets can be merged and modified before baking, but once baked, they are immutable. + Baking a preset finalizes it by populating `preset.modules` based on flags, performing final validations, and substituting environment variables in preloaded modules. It also evaluates custom `conditions` as specified in the preset. @@ -658,7 +660,7 @@ def from_dict(cls, preset_dict, name=None, _exclude=None, _log=False): >>> preset = Preset.from_dict({"target": ["evilcorp.com"], "modules": ["portscan"]}) """ new_preset = cls( - *preset_dict.get("targets", []), + *preset_dict.get("target", []), whitelist=preset_dict.get("whitelist"), blacklist=preset_dict.get("blacklist"), modules=preset_dict.get("modules"), diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 5dc3b8d834..be74276465 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -601,6 +601,22 @@ async def handle_event(self, event): shutil.rmtree(custom_module_dir) +def test_preset_scope_round_trip(): + preset_dict = { + "target": ["127.0.0.1"], + "whitelist": ["127.0.0.2"], + "blacklist": ["127.0.0.3"], + "config": {"scope": {"strict": True}}, + } + preset = Preset.from_dict(preset_dict) + baked = preset.bake() + assert list(baked.seeds) == ["127.0.0.1"] + assert list(baked.whitelist) == ["127.0.0.2"] + assert list(baked.blacklist) == ["127.0.0.3"] + assert baked.config.scope.strict is True + assert baked.to_dict(include_target=True) == preset_dict + + @pytest.mark.asyncio async def test_preset_module_loader(): custom_module_dir = bbot_test_dir / "custom_module_dir" From 1c62084827e348fae091b4b945b1c33658a34d31 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 10 May 2025 11:55:47 -0400 Subject: [PATCH 137/147] tolerate 'target' and 'targets' --- bbot/scanner/preset/preset.py | 5 ++++- bbot/test/test_step_1/test_presets.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 3d317d73dc..0836123e4d 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -659,8 +659,11 @@ def from_dict(cls, preset_dict, name=None, _exclude=None, _log=False): Examples: >>> preset = Preset.from_dict({"target": ["evilcorp.com"], "modules": ["portscan"]}) """ + # tolerate both "target" and "targets", since this is a common oopsie + targets = preset_dict.get("target", []) + targets += preset_dict.get("targets", []) new_preset = cls( - *preset_dict.get("target", []), + *targets, whitelist=preset_dict.get("whitelist"), blacklist=preset_dict.get("blacklist"), modules=preset_dict.get("modules"), diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index be74276465..8fb5b75858 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -617,6 +617,26 @@ def test_preset_scope_round_trip(): assert baked.to_dict(include_target=True) == preset_dict +def test_preset_target_tolerance(): + # tolerate both "target" and "targets", since this is a common oopsie + preset_dict = { + "target": ["127.0.0.1"], + "targets": ["127.0.0.2"], + } + preset = Preset.from_dict(preset_dict) + baked = preset.bake() + assert set(baked.seeds) == {"127.0.0.1", "127.0.0.2"} + + preset = Preset.from_yaml_string(""" +target: + - 127.0.0.1 +targets: + - 127.0.0.2 +""") + baked = preset.bake() + assert set(baked.seeds) == {"127.0.0.1", "127.0.0.2"} + + @pytest.mark.asyncio async def test_preset_module_loader(): custom_module_dir = bbot_test_dir / "custom_module_dir" From ea1d1b811debe0e55682b06ebe4db65ff59fa63b Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 27 May 2025 08:40:01 -0400 Subject: [PATCH 138/147] http headers --- bbot/modules/output/http.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 0af65a87d2..3de8939c71 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -15,6 +15,7 @@ class HTTP(BaseOutputModule): "bearer": "", "username": "", "password": "", + "headers": {}, "timeout": 10, } options_desc = { @@ -23,6 +24,7 @@ class HTTP(BaseOutputModule): "bearer": "Authorization Bearer token", "username": "Username (basic auth)", "password": "Password (basic auth)", + "headers": "Additional headers to send with the request", "timeout": "HTTP timeout", } @@ -30,7 +32,12 @@ async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) +<<<<<<< Updated upstream self.headers = {} +======= + self.siem_friendly = self.config.get("siem_friendly", False) + self.headers = self.config.get("headers", {}) +>>>>>>> Stashed changes bearer = self.config.get("bearer", "") if bearer: self.headers["Authorization"] = f"Bearer {bearer}" From 17a0828be74b951289db04230908d786c33e2864 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 27 May 2025 08:52:19 -0400 Subject: [PATCH 139/147] http headers --- bbot/modules/output/http.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 3de8939c71..3b61556cb6 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -32,12 +32,7 @@ async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) -<<<<<<< Updated upstream - self.headers = {} -======= - self.siem_friendly = self.config.get("siem_friendly", False) self.headers = self.config.get("headers", {}) ->>>>>>> Stashed changes bearer = self.config.get("bearer", "") if bearer: self.headers["Authorization"] = f"Bearer {bearer}" From e5c4eecb45c22a02646faf781ccd857eca7536ad Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 2 Jun 2025 14:09:15 -0400 Subject: [PATCH 140/147] small updates --- bbot/models/pydantic.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 4ec7a3dc65..abe8a68477 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -11,11 +11,6 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def __init__(self, **data): - super().__init__(**data) - if getattr(self, "host", ""): - self.reverse_host = self.host[::-1] - def to_json(self, **kwargs): return json.dumps(self.model_dump(), sort_keys=True, **kwargs) From 09eb58ac74fb3d09209585eb13c54c707a84033f Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 2 Jun 2025 16:10:10 -0400 Subject: [PATCH 141/147] fix http tests --- bbot/models/pydantic.py | 14 ++++++++++---- bbot/modules/output/http.py | 4 +++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index abe8a68477..a644c254c8 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,7 +1,7 @@ import json import logging from typing import Optional, List, Annotated -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, computed_field from bbot.models.helpers import utc_now_timestamp @@ -65,9 +65,6 @@ class Event(BBOTBaseModel): host: Annotated[Optional[str], "indexed"] = None port: Optional[int] = None netloc: Optional[str] = None - # we store the host in reverse to allow for instant subdomain queries - # this works because indexes are left-anchored, but we need to search starting from the right side - reverse_host: Annotated[Optional[str], "indexed"] = "" resolved_hosts: Optional[List] = None dns_children: Optional[dict] = None web_spider_distance: int = 10 @@ -93,6 +90,15 @@ def get_data(self): def __hash__(self): return hash(self.id) + @computed_field + @property + def reverse_host(self) -> Annotated[str, "indexed"]: + """ + We store the host in reverse to allow for instant subdomain queries + This works because indexes are left-anchored, but we need to search starting from the right side + """ + return self.host[::-1] + ### SCAN ### diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 3b61556cb6..28fa917fc7 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -1,3 +1,5 @@ +from omegaconf import OmegaConf + from bbot.models.pydantic import Event from bbot.modules.output.base import BaseOutputModule @@ -32,7 +34,7 @@ async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) - self.headers = self.config.get("headers", {}) + self.headers = OmegaConf.to_object(self.config.get("headers", OmegaConf.create())) bearer = self.config.get("bearer", "") if bearer: self.headers["Authorization"] = f"Bearer {bearer}" From 0af5e285e579c815345ffce74246feccbe26b023 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 2 Jun 2025 21:27:08 -0400 Subject: [PATCH 142/147] event pydantic updates --- bbot/models/pydantic.py | 28 +++++++++++++++---- bbot/modules/output/mongo.py | 14 ++++++---- bbot/test/test_step_1/test_db_models.py | 2 +- .../module_tests/test_module_elastic.py | 6 ++-- .../module_tests/test_module_mongo.py | 8 +++--- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index a644c254c8..68e71d493e 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,7 +1,7 @@ import json import logging -from typing import Optional, List, Annotated from pydantic import BaseModel, ConfigDict, Field, computed_field +from typing import Optional, List, Annotated, get_origin, get_args from bbot.models.helpers import utc_now_timestamp @@ -21,8 +21,24 @@ def __eq__(self, other): return hash(self) == hash(other) @classmethod - def _indexed_fields(cls): - return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) + def indexed_fields(cls): + indexed_fields = {} + + # Handle regular fields + for fieldname, field in cls.model_fields.items(): + if any(isinstance(m, str) and m.startswith("indexed") for m in field.metadata): + indexed_fields[fieldname] = field.metadata + + # Handle computed fields + for fieldname, field in cls.model_computed_fields.items(): + return_type = field.return_type + if get_origin(return_type) is Annotated: + type_args = get_args(return_type) + metadata = list(type_args[1:]) # Skip the first arg (the actual type) + if any(isinstance(m, str) and m.startswith("indexed") for m in metadata): + indexed_fields[fieldname] = metadata + + return indexed_fields # we keep these because they were a lot of work to make and maybe someday they'll be useful again @@ -92,12 +108,14 @@ def __hash__(self): @computed_field @property - def reverse_host(self) -> Annotated[str, "indexed"]: + def reverse_host(self) -> Annotated[Optional[str], "indexed"]: """ We store the host in reverse to allow for instant subdomain queries This works because indexes are left-anchored, but we need to search starting from the right side """ - return self.host[::-1] + if self.host: + return self.host[::-1] + return None ### SCAN ### diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 118ca82378..842f1d72fc 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -5,6 +5,9 @@ class Mongo(BaseOutputModule): + """ + docker run --rm -p 27017:27017 mongo + """ watched_events = ["*"] meta = { "description": "Output scan data to a MongoDB database", @@ -48,11 +51,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field_name, field in Event.model_fields.items(): - if "indexed" in field.metadata: - unique = "unique" in field.metadata - await self.events_collection.create_index([(field_name, 1)], unique=unique) - self.verbose(f"Index created for field: {field_name} (unique={unique})") + for fieldname, metadata in Event.indexed_fields().items(): + if "indexed" in metadata: + unique = "unique" in metadata + await self.events_collection.create_index([(fieldname, 1)], unique=unique) + self.verbose(f"Index created for field: {fieldname} (unique={unique})") return True @@ -65,6 +68,7 @@ async def handle_event(self, event): break except Exception as e: self.warning(f"Error inserting event into MongoDB: {e}, retrying...") + self.trace() await self.helpers.sleep(1) if event.type == "SCAN": diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 0e175f3418..d453fa81e1 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -18,7 +18,7 @@ def test_pydantic_models(events, bbot_scanner): assert utc_now2.timestamp() == utc_now.timestamp() test_event = Event(**events.ipv4.json()) - assert sorted(test_event._indexed_fields()) == [ + assert sorted(test_event.indexed_fields()) == [ "data", "host", "id", diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index b3af6e694a..9846f02246 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -103,9 +103,9 @@ async def check(self, module_test, events): # Events don't match exactly because the elastic ones have reverse_host and inserted_at assert events_json != db_events_pydantic for db_event in db_events_pydantic: - db_event.pop("reverse_host") - db_event.pop("inserted_at") - db_event.pop("archived") + db_event.pop("reverse_host", None) + db_event.pop("inserted_at", None) + db_event.pop("archived", None) # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 978d79c4b3..25c317e57c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -76,7 +76,7 @@ async def check(self, module_test, events): # make sure the collection has all the right indexes cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) - for field in Event._indexed_fields(): + for field in Event.indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" ### EVENTS ### @@ -116,9 +116,9 @@ async def check(self, module_test, events): # Events don't match exactly because the mongo ones have reverse_host and inserted_at assert events_json != db_events_pydantic for db_event in db_events_pydantic: - db_event.pop("reverse_host") - db_event.pop("inserted_at") - db_event.pop("archived") + db_event.pop("reverse_host", None) + db_event.pop("inserted_at", None) + db_event.pop("archived", None) # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" From 6a36986bfdc61054ca594ba837ee466889f4e037 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 2 Jun 2025 21:27:17 -0400 Subject: [PATCH 143/147] ruffed --- bbot/modules/output/mongo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 842f1d72fc..f90c4aad53 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -8,6 +8,7 @@ class Mongo(BaseOutputModule): """ docker run --rm -p 27017:27017 mongo """ + watched_events = ["*"] meta = { "description": "Output scan data to a MongoDB database", From 75b15ee6284192e9b0993b84d3cc6222fe04dd84 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Sep 2025 12:19:22 -0400 Subject: [PATCH 144/147] fix graphql, aspnet bin --- bbot/modules/aspnet_bin_exposure.py | 1 + bbot/modules/graphql_introspection.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bbot/modules/aspnet_bin_exposure.py b/bbot/modules/aspnet_bin_exposure.py index 9367058412..2f110b0e25 100644 --- a/bbot/modules/aspnet_bin_exposure.py +++ b/bbot/modules/aspnet_bin_exposure.py @@ -63,6 +63,7 @@ async def handle_event(self, event): description = f"IIS Bin Directory DLL Exposure. Detection Url: [{test_url}]" await self.emit_event( { + "name": "IIS Bin Directory DLL Exposure", "severity": "HIGH", "host": str(event.host), "url": normalized_url, diff --git a/bbot/modules/graphql_introspection.py b/bbot/modules/graphql_introspection.py index a820bfb6f3..7a6864cfd7 100644 --- a/bbot/modules/graphql_introspection.py +++ b/bbot/modules/graphql_introspection.py @@ -135,8 +135,14 @@ async def handle_event(self, event): filename = self.output_dir / filename with open(filename, "w") as f: json.dump(response_json, f) + relative_path = str(filename.relative_to(self.scan.home)) await self.emit_event( - {"url": url, "description": "GraphQL schema", "path": str(filename.relative_to(self.scan.home))}, + { + "name": "GraphQL Schema", + "url": url, + "description": f"GraphQL Schema at {url}", + "path": relative_path, + }, "FINDING", event, context=f"{{module}} found GraphQL schema at {url}", From 3a67e0c606e736e66e4dadd24794e183649d7384 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Sep 2025 12:20:58 -0400 Subject: [PATCH 145/147] fix iis shortnames --- bbot/modules/iis_shortnames.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/iis_shortnames.py b/bbot/modules/iis_shortnames.py index e66feff9cd..fcf2b28d1e 100644 --- a/bbot/modules/iis_shortnames.py +++ b/bbot/modules/iis_shortnames.py @@ -341,6 +341,7 @@ class safety_counter_obj: if url_hint.lower().endswith(".zip"): await self.emit_event( { + "name": "Possible backup file (zip) in web root", "host": str(event.host), "url": event.data, "description": f"Possible backup file (zip) in web root: {normalized_url}{url_hint}", From 83a87dc2faa7b14d9aabade19b47298a7403a710 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Sep 2025 12:23:36 -0400 Subject: [PATCH 146/147] fix medusa --- bbot/modules/medusa.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/bbot/modules/medusa.py b/bbot/modules/medusa.py index 46fbc1ae2e..f8e5d60573 100644 --- a/bbot/modules/medusa.py +++ b/bbot/modules/medusa.py @@ -143,7 +143,8 @@ async def handle_event(self, event): self.info(f"Medusa stderr: {result.stderr}") async for message in self.parse_output(result.stdout, snmp_version): - vuln_event = self.create_vuln_event("CRITICAL", message, event) + vuln_name = f"Valid SNMPV{snmp_version} Credentials Found!" + vuln_event = self.create_vuln_event("CRITICAL", vuln_name, message, event) await self.emit_event(vuln_event) # else: Medusa supports various protocols which could in theory be implemented later on. @@ -219,17 +220,12 @@ async def construct_command(self, host, port, protocol, protocol_version): return cmd - def create_vuln_event(self, severity, description, source_event): + def create_vuln_event(self, severity, name, description, source_event): host = str(source_event.host) port = str(source_event.port) return self.make_event( - { - "severity": severity, - "host": host, - "port": port, - "description": description, - }, + {"severity": severity, "host": host, "port": port, "description": description, "name": name}, "VULNERABILITY", source_event, ) From 43936a181e547ef67b66718fae44ac54234b4a0a Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 20 Sep 2025 10:57:00 -0400 Subject: [PATCH 147/147] fix tests --- bbot/modules/retirejs.py | 1 + .../module_tests/test_module_graphql_introspection.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/modules/retirejs.py b/bbot/modules/retirejs.py index 27e8fec407..4b58c1b23d 100644 --- a/bbot/modules/retirejs.py +++ b/bbot/modules/retirejs.py @@ -183,6 +183,7 @@ async def handle_event(self, event): description_parts.append(f"Affected versions: [>= {at_or_above}]") description = " ".join(description_parts) data = { + "name": "Vulnerable JavaScript Library", "description": description, "severity": severity, "component": component, diff --git a/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py b/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py index f6a47671c7..dd0380f653 100644 --- a/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py +++ b/bbot/test/test_step_2/module_tests/test_module_graphql_introspection.py @@ -31,4 +31,4 @@ def check(self, module_test, events): finding = [e for e in events if e.type == "FINDING"] assert finding, "should have raised 1 FINDING event" assert finding[0].data["url"] == "http://127.0.0.1:8888/" - assert finding[0].data["description"] == "GraphQL schema" + assert finding[0].data["description"] == "GraphQL Schema at http://127.0.0.1:8888/"