GSA-TTS · asteel-gsa · Apr 23, 2025
diff --git a/README.md b/README.md
@@ -358,6 +358,52 @@ resource "cloudfoundry_network_policy" "logshipper-network-policy" {
 }
 ```
 
+## REST File Scanner
+Creates a small application in cloudfoundry that is used to scan s3 bucket contents at REST. The purpose of this was originally to satisfy an ATO requirement where it may be necessary to scan files periodically. By attaching a `logdrain` it, you can also use new relic to facilitate alerting.
+
+
+**Requirements:**
+* An s3 Bucket to scan from (add as a service binding).
+* Use a `resource "cloudfoundry_service_instance" "clamav_ups" {}` (add as a service binding) or `environment_variables` to give the scan url
+* Some scanning tool (clamav for example, add as a service binding)
+
+**Optional but suggested requirements:**
+* A quarantine bucket, or some quarantine solution.
+* Logdrain from the Logshipper module `module.logshipper.logdrain_name` (add as a service binding) if you wish to send scan notifications to New Relic and use New Relic for your alerting.
+
+It is recommended to use a `depends_on = []` to ensure any necessary s3's, logshipper, virus scanner and the `cloudfoundry_service_instance` are created before this module attempts to be created.
+
+If you would like information on a potential scanning application, the FAC created a small flask app to handle their needs. You may fork and iterate on, or develop one for your preferred tech stack https://github.com/GSA-TTS/fac-periodic-scanner
+
+
+```tf
+module "file-scanner" {
+  source               = "github.com/gsa-tts/terraform-cloudgov//scanner?ref=v2.4.0"
+  name                 = local.scanner_name
+  cf_org_name          = var.cf_org_name
+  github_repo_name     = "my-repo-name"
+  src_code_folder_name = "" #See variables.tf for information
+  cf_space = {
+    id   = data.cloudfoundry_space.space.id
+    name = var.cf_space_name
+  }
+  buildpacks        = ["https://my-buildpack1-link.com", "my-buildpack2"]
+  https_proxy_url   = module.egress_proxy.https_url
+  scanner_instances = 1
+  scanner_memory    = "512M"
+  disk_quota        = "512M"
+  service_bindings = {
+    "${module.quarantine.bucket_name}"                 = "",
+    "${module.some-s3.bucket_name}"                 = "",
+    "${cloudfoundry_service_instance.clamav_ups.name}" = "",
+    "${module.logshipper.logdrain_name}"               = ""
+  }
+  environment_variables = {
+    AV_SCAN_URL = "https://clamav-${var.cf_space_name}.apps.internal:61443/scan"
+  }
+}
+```
+
 ## Testing
 
 > [!WARNING]

diff --git a/scanner/main.tf b/scanner/main.tf
@@ -0,0 +1,52 @@
+locals {
+  domain = "apps.internal"
+}
+
+data "external" "scannerzip" {
+  program     = ["/bin/sh", "prepare-scanner.sh"]
+  working_dir = path.module
+  query = {
+    gitref     = var.gitref
+    org        = var.github_org_name
+    repo       = var.github_repo_name
+    src_folder = var.src_code_folder_name
+  }
+}
+
+resource "cloudfoundry_app" "scanner_app" {
+  name       = var.name
+  space_name = var.cf_space.name
+  org_name   = var.cf_org_name
+
+  buildpacks       = var.buildpacks
+  path             = "${path.module}/${data.external.scannerzip.result.path}"
+  source_code_hash = filesha256("${path.module}/${data.external.scannerzip.result.path}")
+
+  timeout           = 180
+  disk_quota        = var.disk_quota
+  memory            = var.scanner_memory
+  instances         = var.scanner_instances
+  strategy          = "rolling"
+  health_check_type = "port"
+
+  service_bindings = [
+    for service_name, params in var.service_bindings : {
+      service_instance = service_name
+      params           = (params == "" ? "{}" : params) # Empty string -> Minimal JSON
+    }
+  ]
+
+  environment = merge({
+    PROXYROUTE = "${var.https_proxy_url}"
+  }, var.environment_variables)
+}
+
+module "route" {
+  source = "../app_route"
+
+  cf_org_name   = var.cf_org_name
+  cf_space_name = var.cf_space.name
+  domain        = local.domain
+  hostname      = coalesce(var.hostname, var.name)
+  app_ids       = [cloudfoundry_app.scanner_app.id]
+}
diff --git a/scanner/outputs.tf b/scanner/outputs.tf
@@ -0,0 +1,3 @@
+output "app_id" {
+  value = cloudfoundry_app.scanner_app.id
+}
diff --git a/scanner/prepare-scanner.sh b/scanner/prepare-scanner.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+# Exit if any step fails
+set -e
+
+popdir=$(pwd)
+eval "$(jq -r '@sh "GITREF=\(.gitref) ORG=\(.org) REPO=\(.repo) SRC_FOLDER=\(.src_folder)"')"
+
+# Portable construct so this will work everywhere
+# https://unix.stackexchange.com/a/84980
+tmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir')
+cd "${tmpdir}"
+
+# Grab a copy of the zip file for the specified ref
+# https://github.com/GSA-TTS/fac-periodic-scanner/archive/refs/heads/main.zip
+curl -s -L "https://github.com/${ORG}/${REPO}/archive/${GITREF}.zip" --output "${tmpdir}/local.zip"
+
+# Get the folder that curl will download, usually looks like {repo_name}-{branch_name}/
+zip_folder=$(unzip -l local.zip | awk '/\/$/ {print $4}' | awk -F'/' '{print $1}' | sort -u)
+
+# Zip up just the app for pushing under different circumstances.
+# if $SRC_FOLDER = "", then we want to look for the app in the root of the repo {repo_name}-{branch_name}/.
+if [ -z "$SRC_FOLDER" ]; then
+  unzip -q -u local.zip "$zip_folder/*"
+  cd "$zip_folder/" && zip -q -r -o -X "${popdir}/scanner.zip" ./
+else
+# if $SRC_FOLDER = "some/folder" then we want to look for the app in that path {repo_name}-{branch_name}/{src_code_folder}/.
+  unzip -q -u local.zip "$zip_folder/$SRC_FOLDER/*"
+  cd "$zip_folder/$SRC_FOLDER/" && zip -q -r -o -X "${popdir}/scanner.zip" ./
+fi
+
+# Tell Terraform where to find it
+cat << EOF
+{ "path": "scanner.zip" }
+EOF
diff --git a/scanner/providers.tf b/scanner/providers.tf
@@ -0,0 +1,9 @@
+terraform {
+  required_version = "~> 1.0"
+  required_providers {
+    cloudfoundry = {
+      source  = "cloudfoundry/cloudfoundry"
+      version = ">=1.4.0"
+    }
+  }
+}
diff --git a/scanner/tests/creation.tftest.hcl b/scanner/tests/creation.tftest.hcl
@@ -0,0 +1,39 @@
+provider "cloudfoundry" {}
+
+variables {
+  cf_org_name = "gsa-tts-devtools-prototyping"
+  cf_space = {
+    id   = "15836eb6-a57e-4579-bca7-99764c5a01a4"
+    name = "terraform-cloudgov-ci-tests"
+  }
+  name                  = "file-scanner"
+  https_proxy_url       = "https://egress-proxy-user:egress-proxy-password@some-internal-route.test.foo:00000"
+  buildpacks            = ["https://github.com/cloudfoundry/python-buildpack"]
+  github_repo_name      = "fac-periodic-scanner"
+  src_code_folder_name  = ""
+  # service_bindings = {
+  #   my-service_instance = ""
+  # }
+}
+
+run "application_tests" {
+  assert {
+    condition     = cloudfoundry_app.scanner_app.id == output.app_id
+    error_message = "Output id must match the app id"
+  }
+  assert {
+    condition     = cloudfoundry_app.scanner_app.buildpacks != null
+    error_message = "The application buildpacks should not be empty"
+  }
+}
+
+run "src_tests" {
+  assert {
+    condition     = cloudfoundry_app.scanner_app.path == "${path.module}/${data.external.scanner_zip.result.path}"
+    error_message = "The path for the zip should be in the module path"
+  }
+  assert {
+    condition     = cloudfoundry_app.scanner_app.source_code_hash == filesha256("${path.module}/${data.external.scanner_zip.result.path}")
+    error_message = "The hash for the zip should be a valid sha256"
+  }
+}
diff --git a/scanner/variables.tf b/scanner/variables.tf
@@ -0,0 +1,105 @@
+variable "name" {
+  type        = string
+  description = "name of the scanner application"
+}
+
+variable "cf_org_name" {
+  type        = string
+  description = "cloud.gov organization name"
+}
+
+variable "cf_space" {
+  type        = object({ id = string, name = string })
+  description = "cloud.gov space"
+}
+
+variable "buildpacks" {
+  description = "A list of buildpacks to add to the app resource."
+  type        = list(string)
+}
+
+variable "gitref" {
+  type        = string
+  description = "gitref for the specific version of scanner that you want to use"
+  default     = "refs/heads/main"
+  # You can also specify a specific commit, eg "7487f882903b9e834a5133a883a88b16fb8b16c9"
+}
+
+variable "scanner_memory" {
+  type        = string
+  description = "Memory in MB to allocate to scanner app instance"
+  default     = "512M"
+}
+
+variable "scanner_instances" {
+  type        = number
+  description = "the number of instances of the scanner app to run (default: 1)"
+  default     = 1
+}
+
+variable "github_org_name" {
+  description = "The name of the github organization. (ex. gsa-tts)"
+  type        = string
+  default     = "gsa-tts"
+}
+
+variable "github_repo_name" {
+  description = "The name of the github repo (ex. fac, terraform-cloudgov, etc)"
+  type        = string
+}
+
+variable "src_code_folder_name" {
+  description = "The name of the folder that contains your src code without a trailing '/'. Generally the folder that would contain your Procfile. This will be used as the apps /app/ dir."
+  type        = string
+  # Examples:
+  # "" -> Project to deploy is in the root of the repo
+  # "backend" -> Project to deploy is in the backend/ directory
+  # "backend/app" -> Project to deploy is in the backend/app directory
+}
+
+variable "disk_quota" {
+  type        = string
+  description = "disk in MB to allocate to cg-logshipper app instance"
+  default     = "512M"
+}
+
+variable "https_proxy_url" {
+  type        = string
+  description = "the full string of the https proxy for use with the logshipper app"
+  sensitive   = true
+}
+
+variable "hostname" {
+  description = "The hostname to route to. Combined with var.domain for the full route. Defaults to var.name if omitted"
+  type        = string
+  default     = null
+}
+
+# Example:
+# service_bindings = {
+#   my-service = "",
+#   (module.my-other-service.name) = "",
+#   yet-another-service = <<-EOT
+#      {
+#        "astring"     : "foo",
+#        "anarray"     : ["bar", "baz"],
+#        "anarrayobjs" : [
+#          {
+#            "name": "bat",
+#            "value": "boz"
+#        ],
+#      }
+#      EOT
+#   }
+# }
+variable "service_bindings" {
+  description = "A map of service instance name to JSON parameter string."
+  type        = map(string)
+  default     = {}
+}
+
+variable "environment_variables" {
+  description = "A map of environment values."
+  type        = map(string)
+  default     = {}
+}