diff --git a/_emulator/extensions/bigquery-dlp-functions.env.local b/_emulator/extensions/bigquery-dlp-functions.env.local new file mode 100644 index 00000000..e69de29b diff --git a/_emulator/firebase.json b/_emulator/firebase.json index 0f5728b6..89804c34 100644 --- a/_emulator/firebase.json +++ b/_emulator/firebase.json @@ -1,7 +1,6 @@ { "extensions": { - "firestore-record-user-acknowledgements": "../firestore-record-user-acknowledgements", - "firestore-bundle-server": "../firestore-bundle-server" + "bigquery-dlp-functions": "../bigquery-dlp-functions" }, "storage": { "rules": "storage.rules" diff --git a/bigquery-dlp-functions/CHANGELOG.md b/bigquery-dlp-functions/CHANGELOG.md new file mode 100644 index 00000000..af3bf27c --- /dev/null +++ b/bigquery-dlp-functions/CHANGELOG.md @@ -0,0 +1,11 @@ +## Version 0.0.3 + +feat: added record transformations + +## Version 0.0.2 + +fix: remove default value in DATASET_ID param + +## Version 0.0.1 + +Alpha release allowing deidentify/reidentify with documentation. diff --git a/bigquery-dlp-functions/POSTINSTALL.md b/bigquery-dlp-functions/POSTINSTALL.md new file mode 100644 index 00000000..aabf6822 --- /dev/null +++ b/bigquery-dlp-functions/POSTINSTALL.md @@ -0,0 +1,28 @@ +### See it in action + +1. Go to your project's [BigQuery](https://console.cloud.google.com/bigquery?cloudshell=false&project=${param:PROJECT_ID}) in the Google Cloud console. +2. If it doesn't exist already, create a dataset called `${param:DATASET_ID}`. +3. Create a table that contains the data you want to de-identify. +4. Run the following query to de-identify the data in the table: + +```sql +SELECT + val, + `dev-extensions-testing.bq_testing`.deindetify(TO_JSON(val)) +FROM + `dev-extensions-testing.bq_testing.users` AS val +``` + +5. Run the following query to re-identify the data in the table: + +```sql +SELECT + val, + `dev-extensions-testing.bq_testing`.reindetify(TO_JSON(val)) +FROM + `dev-extensions-testing.bq_testing.users` AS val +``` + +### Monitoring + +As a best practice, you can [monitor the activity](https://firebase.google.com/docs/extensions/manage-installed-extensions#monitor) of your installed extension, including checks on its health, usage, and logs. diff --git a/bigquery-dlp-functions/PREINSTALL.md b/bigquery-dlp-functions/PREINSTALL.md new file mode 100644 index 00000000..331979f6 --- /dev/null +++ b/bigquery-dlp-functions/PREINSTALL.md @@ -0,0 +1,25 @@ +Use this extension to de-identify sensitive data in BigQuery using the [Data Loss Prevention API](https://cloud.google.com/dlp/docs/). + +This extension deploys 2 BigQuery remote functions, this extension: + +- Perform de-identifaction on sensitive data passed as JSON from BigQuery. +- Re-identify sensitive data that were de-identified with reversable techniques. + +You specify the desired DLP technique. All techniques are powered by the Google [Data Loss Prevention API](https://cloud.google.com/dlp/docs/transformations-reference). The options offered are: + +- Replace with Masking. +- Redact a value (remove it from the data). + +#### Additional setup + +Before installing this extension, make sure that you've set up a BigQuery [dataset](https://cloud.google.com/bigquery/docs/datasets) and [table](https://cloud.google.com/bigquery/docs/tables). + +#### Billing + +This extension uses other Firebase or Google Cloud Platform services which may have associated charges: + +- Cloud Data Loss Prevention API +- BigQuery +- Cloud Functions + +When you use Firebase Extensions, you're only charged for the underlying resources that you use. A paid-tier billing plan is only required if the extension uses a service that requires a paid-tier plan, for example calling to a Google Cloud Platform API or making outbound network requests to non-Google services. All Firebase services offer a free tier of usage. [Learn more about Firebase billing.](https://firebase.google.com/pricing) diff --git a/bigquery-dlp-functions/README.md b/bigquery-dlp-functions/README.md new file mode 100644 index 00000000..408bc9b7 --- /dev/null +++ b/bigquery-dlp-functions/README.md @@ -0,0 +1,92 @@ +# BigQuery DLP Remote Function + +**Author**: Firebase (**[https://firebase.google.com](https://firebase.google.com)**) + +**Description**: This extension creates BigQuery functions to facilitate de-identification and re-identification in queries, providing configurable techniques, seamless integration, and ensuring better data privacy and compliance. + +--- + +## 🧩 Install this experimental extension + +> ⚠️ **Experimental**: This extension is available for testing as an _experimental_ release. It has not been as thoroughly tested as the officially released extensions, and future updates might introduce breaking changes. If you use this extension, please [report bugs and make feature requests](https://github.com/firebase/experimental-extensions/issues/new/choose) in our GitHub repository. + +### Console + +[![Install this extension in your Firebase project](../install-extension.png?raw=true "Install this extension in your Firebase project")](https://console.firebase.google.com/project/_/extensions/install?ref=firebase/bigquery-dlp-functions) + +### Firebase CLI + +```bash +firebase ext:install firebase/bigquery-dlp-functions --project= +``` + +> Learn more about installing extensions in the Firebase Extensions documentation: [console](https://firebase.google.com/docs/extensions/install-extensions?platform=console), [CLI](https://firebase.google.com/docs/extensions/install-extensions?platform=cli) + +--- + +**Details**: Use this extension to de-identify sensitive data in BigQuery using the [Data Loss Prevention API](https://cloud.google.com/dlp/docs/). + +This extension deploys 2 BigQuery remote functions, this extension: + +- Perform de-identifaction on sensitive data passed as JSON from BigQuery. +- Re-identify sensitive data that were de-identified with reversable techniques. + +You specify the desired DLP technique. All techniques are powered by the Google [Data Loss Prevention API](https://cloud.google.com/dlp/docs/transformations-reference). The options offered are: + +- Replace with Masking. +- Redact a value (remove it from the data). + +#### Additional setup + +Before installing this extension, make sure that you've set up a BigQuery [dataset](https://cloud.google.com/bigquery/docs/datasets) and [table](https://cloud.google.com/bigquery/docs/tables). + +#### Billing + +This extension uses other Firebase or Google Cloud Platform services which may have associated charges: + +- Cloud Data Loss Prevention API +- BigQuery +- Cloud Functions + +When you use Firebase Extensions, you're only charged for the underlying resources that you use. A paid-tier billing plan is only required if the extension uses a service that requires a paid-tier plan, for example calling to a Google Cloud Platform API or making outbound network requests to non-Google services. All Firebase services offer a free tier of usage. [Learn more about Firebase billing.](https://firebase.google.com/pricing) + +**Configuration Parameters:** + +- DLP Transformation Method: The method used by Data Loss Prevention API to deidentify and/or encrypt sensitive information in the data. + +- DLP Transformation Technique: The technique used by Data Loss Prevention API to deidentify and/or encrypt sensitive information in the data. + +- List of fields to transform using record transformation (comma separated): The list of fields to transform using record transformation. This is only used when the transformation method is set to `RECORD`. + +- BigQuery Dataset ID: The ID of the dataset where the extension will create a connection. + +- Cloud Functions location: Where do you want to deploy the functions created for this extension? You usually want a location close to your database. For help selecting a location, refer to the [location selection guide](https://firebase.google.com/docs/functions/locations). + +**Cloud Functions:** + +- **createBigQueryConnection:** Creates a BigQuery connection. + +- **deidentifyData:** TODO + +- **reidentifyData:** TODO + +**APIs Used**: + +- bigquery.googleapis.com (Reason: Powers all BigQuery tasks performed by the extension.) + +- bigqueryconnection.googleapis.com (Reason: Allows the extension to create a BigQuery connection.) + +- dlp.googleapis.com (Reason: Allows the extension to use DLP services.) + +**Access Required**: + +This extension will operate with the following project IAM roles: + +- bigquery.jobUser (Reason: Allows the extension to create BigQuery jobs.) + +- bigquery.dataOwner (Reason: Allows the extension to create BigQuery routines.) + +- bigquery.connectionAdmin (Reason: Allows the extension to create a BigQuery connection.) + +- dlp.user (Reason: Allows the extension to use DLP services.) + diff --git a/bigquery-dlp-functions/extension.yaml b/bigquery-dlp-functions/extension.yaml new file mode 100644 index 00000000..c4ed696e --- /dev/null +++ b/bigquery-dlp-functions/extension.yaml @@ -0,0 +1,160 @@ +# Learn detailed information about the fields of an extension.yaml file in the docs: +# https://firebase.google.com/docs/extensions/alpha/ref-extension-yaml + +name: bigquery-dlp-functions # Identifier for your extension +version: 0.0.3 # Follow semver versioning +specVersion: v1beta # Version of the Firebase Extensions specification + +author: + authorName: Firebase + url: https://firebase.google.com + +displayName: BigQuery DLP Remote Function + +description: This extension creates BigQuery functions to facilitate de-identification and re-identification in queries, providing configurable techniques, seamless integration, and ensuring better data privacy and compliance. + +license: Apache-2.0 # https://spdx.org/licenses/ + +sourceUrl: TODO + +billingRequired: true + +apis: + - apiName: bigquery.googleapis.com + reason: Powers all BigQuery tasks performed by the extension. + - apiName: bigqueryconnection.googleapis.com + reason: Allows the extension to create a BigQuery connection. + - apiName: dlp.googleapis.com + reason: Allows the extension to use DLP services. + +roles: + - role: bigquery.jobUser + reason: Allows the extension to create BigQuery jobs. + - role: bigquery.dataOwner + reason: Allows the extension to create BigQuery routines. + - role: bigquery.connectionAdmin + reason: Allows the extension to create a BigQuery connection. + - role: dlp.user + reason: Allows the extension to use DLP services. + +resources: + - name: createBigQueryConnection + type: firebaseextensions.v1beta.function + description: Creates a BigQuery connection. + properties: + location: ${param:LOCATION} + runtime: nodejs14 + taskQueueTrigger: {} + - name: deidentifyData + type: firebaseextensions.v1beta.function + description: TODO + properties: + location: ${param:LOCATION} + runtime: nodejs14 + httpsTrigger: {} + - name: reidentifyData + type: firebaseextensions.v1beta.function + description: TODO + properties: + location: ${param:LOCATION} + runtime: nodejs14 + httpsTrigger: {} + +params: + - param: TRANSFORMATION_METHOD + label: DLP Transformation Method + description: >- + The method used by Data Loss Prevention API to deidentify and/or encrypt sensitive information in the data. + type: select + options: + - label: Info Type Transformations + value: INFO_TYPE + - label: Record Type Transformations + value: RECORD + default: INFO_TYPE + + - param: TRANSFORMATION_TECHNIQUE + label: DLP Transformation Technique + description: >- + The technique used by Data Loss Prevention API to deidentify and/or encrypt sensitive information in the data. + type: select + options: + - label: Replace with Masking Character + value: masking + - label: Redact a value (remove it from the data) + value: redact + - label: Replace with a fixed value + value: fixed + - label: Replace with InfoType value + value: replaceWithInfoType + default: masking + + - param: FIELDS_TO_TRANSFORM + label: List of fields to transform using record transformation (comma separated) + description: >- + The list of fields to transform using record transformation. This is only used when the transformation method is set to `RECORD`. + type: string + + - param: DATASET_ID + label: BigQuery Dataset ID + description: >- + The ID of the dataset where the extension will create a connection. + type: string + required: true + immutable: true + + - param: LOCATION + label: Cloud Functions location + description: >- + Where do you want to deploy the functions created for this extension? You + usually want a location close to your database. For help selecting a + location, refer to the [location selection + guide](https://firebase.google.com/docs/functions/locations). + type: select + options: + - label: Iowa (us-central1) + value: us-central1 + - label: South Carolina (us-east1) + value: us-east1 + - label: Northern Virginia (us-east4) + value: us-east4 + - label: Los Angeles (us-west2) + value: us-west2 + - label: Salt Lake City (us-west3) + value: us-west3 + - label: Las Vegas (us-west4) + value: us-west4 + - label: Belgium (europe-west1) + value: europe-west1 + - label: London (europe-west2) + value: europe-west2 + - label: Frankfurt (europe-west3) + value: europe-west3 + - label: Zurich (europe-west6) + value: europe-west6 + - label: Hong Kong (asia-east2) + value: asia-east2 + - label: Tokyo (asia-northeast1) + value: asia-northeast1 + - label: Osaka (asia-northeast2) + value: asia-northeast2 + - label: Seoul (asia-northeast3) + value: asia-northeast3 + - label: Mumbai (asia-south1) + value: asia-south1 + - label: Jakarta (asia-southeast2) + value: asia-southeast2 + - label: Montreal (northamerica-northeast1) + value: northamerica-northeast1 + - label: Sao Paulo (southamerica-east1) + value: southamerica-east1 + - label: Sydney (australia-southeast1) + value: australia-southeast1 + default: us-central1 + required: true + immutable: true + +lifecycleEvents: + onInstall: + function: createBigQueryConnection + processingMessage: "Creating BigQuery connections" diff --git a/bigquery-dlp-functions/functions/.gitignore b/bigquery-dlp-functions/functions/.gitignore new file mode 100644 index 00000000..7fbb8b40 --- /dev/null +++ b/bigquery-dlp-functions/functions/.gitignore @@ -0,0 +1,8 @@ +## Compiled JavaScript files +**/*.js +**/*.js.map + +# Typescript v1 declaration files +typings/ + +node_modules/ \ No newline at end of file diff --git a/bigquery-dlp-functions/functions/__tests__/__mocks__/index.ts b/bigquery-dlp-functions/functions/__tests__/__mocks__/index.ts new file mode 100644 index 00000000..46f94d77 --- /dev/null +++ b/bigquery-dlp-functions/functions/__tests__/__mocks__/index.ts @@ -0,0 +1,49 @@ +export function createMockDeidentifyContentResponse(itemType: any, value: any) { + return [ + { + item: { + [itemType]: value, + }, + }, + ]; +} + +export const mockProtos = { + google: { + privacy: { + dlp: { + v2: { + FieldId: { + create: jest.fn().mockImplementation(() => { + return { test: "data" }; + }), + }, + Table: { + Row: { + create: jest.fn().mockImplementation(() => { + return { test: "data" }; + }), + }, + }, + Value: { + create: jest.fn().mockImplementation(() => { + return { test: "data" }; + }), + }, + }, + }, + }, + }, +}; + +export const getFunctions = () => { + return { + taskQueue: (functionName: any, queueName: any) => { + return { + enqueue: async (payload: any) => { + console.log("Enqueue payload:", payload); + }, + }; + }, + }; +}; diff --git a/bigquery-dlp-functions/functions/__tests__/functions.test.ts b/bigquery-dlp-functions/functions/__tests__/functions.test.ts new file mode 100644 index 00000000..d8598e0c --- /dev/null +++ b/bigquery-dlp-functions/functions/__tests__/functions.test.ts @@ -0,0 +1,442 @@ +/** Set dynamic mocking modules */ +const mockDeidentityContent = jest.fn(); +const mockCreateConnection = jest.fn(); +const mockCreateQueryJob = jest.fn(); +const mockSetProcessingState = jest.fn(); + +import { + createMockDeidentifyContentResponse, + getFunctions, + mockProtos, +} from "./__mocks__"; + +const fft = require("firebase-functions-test"); +import { DlpServiceClient } from "@google-cloud/dlp"; +import mockedConfig from "../src/config"; +import { logger } from "firebase-functions"; +import * as functions from "../src/index"; +import setupEnvironment from "./setupEnvironment"; + +const testEnv = fft({ projectId: "demo-test" }); + +/** Mock imported modules */ +jest.mock("../src/config"); + +jest.mock("firebase-admin/functions", () => { + return { getFunctions }; +}); + +jest.mock("firebase-admin/extensions", () => { + return { + getExtensions: jest.fn().mockImplementation(() => ({ + runtime: jest.fn().mockImplementation(() => ({ + setProcessingState: mockSetProcessingState, + })), + })), + }; +}); + +jest.mock("@google-cloud/bigquery", () => { + return { + BigQuery: jest.fn().mockImplementation(() => ({ + createQueryJob: mockCreateQueryJob, + })), + }; +}); + +jest.mock("@google-cloud/bigquery-connection", () => { + return { + ConnectionServiceClient: jest.fn().mockImplementation(() => ({ + createConnection: mockCreateConnection, + })), + }; +}); + +jest.mock("@google-cloud/dlp", () => { + return { + DlpServiceClient: jest.fn().mockImplementation(() => ({ + deidentifyContent: mockDeidentityContent, + })), + protos: { ...mockProtos }, + }; +}); + +/** Mock logging functions */ +const debugSpy = jest.spyOn(logger, "debug"); +const warnSpy = jest.spyOn(logger, "warn"); +const infoSpy = jest.spyOn(logger, "info"); + +setupEnvironment(); + +beforeEach(() => { + jest.resetModules(); +}); + +afterEach(() => { + jest.clearAllMocks(); +}); + +describe("BigQuery DLP Extension", () => { + const dlpClient = new DlpServiceClient(); + + describe("deidentifyData", () => { + it("should deidentify data with valid input and a redact technique and INFO_TYPE method", (done) => { + /* Set config data */ + mockedConfig.technique = "redact"; + mockedConfig.method = "INFO_TYPE"; + + /** Configure mock responses */ + mockDeidentityContent.mockImplementation(async () => { + return [ + { + item: { + value: "mock-deidentified-content", + }, + }, + ]; + }); + + /** Setup the http request and expectations */ + const req = { body: { calls: [[{ row1: [{ foo: "bar" }] }]] } }; + const res = { + send: (data: Record) => { + expect(data.replies).toEqual([{ row1: "mock-deidentified-content" }]); + done(); + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(dlpClient.deidentifyContent).toHaveBeenCalled(); + expect(debugSpy).toBeCalledWith( + "Incoming request from BigQuery", + req.body.calls + ); + }); + + it("should deidentify data with valid input and a redact technique and RECORD method", (done) => { + /* Set config data */ + mockedConfig.technique = "redact"; + mockedConfig.method = "RECORD"; + + /** Configure mock responses */ + mockDeidentityContent.mockImplementation(async () => + createMockDeidentifyContentResponse("table", { + headers: [{ name: "test_header" }], + rows: [{ values: [{ stringValue: "testing" }] }], + }) + ); + + /** Setup the http request and expectations */ + const req = { body: { calls: [[{ row1: [{ foo: "bar" }] }]] } }; + const res = { + send: (data: Record) => { + expect(data.replies).toEqual([{ test_header: "testing" }]); + done(); + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(dlpClient.deidentifyContent).toHaveBeenCalled(); + expect(debugSpy).toBeCalledWith( + "Incoming request from BigQuery", + req.body.calls + ); + }); + + it("should deidentify data with valid input and a fixed technique and INFO_TYPE method", (done) => { + /* Set config data */ + mockedConfig.technique = "fixed"; + mockedConfig.method = "INFO_TYPE"; + + /** Configure mock responses */ + mockDeidentityContent.mockImplementation(async () => + createMockDeidentifyContentResponse( + "value", + "mock-deidentified-content" + ) + ); + + /** Setup the http request and expectations */ + const req = { body: { calls: [[{ row1: [{ foo: "bar" }] }]] } }; + const res = { + send: (data: Record) => { + expect(data.replies).toEqual([{ row1: "mock-deidentified-content" }]); + done(); + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(dlpClient.deidentifyContent).toHaveBeenCalled(); + expect(debugSpy).toBeCalledWith( + "Incoming request from BigQuery", + req.body.calls + ); + }); + + it("should deidentify data with valid input and a fixed technique and RECORD method", (done) => { + /* Set config data */ + mockedConfig.technique = "fixed"; + mockedConfig.method = "RECORD"; + + /** Configure mock responses */ + mockDeidentityContent.mockImplementation(async () => + createMockDeidentifyContentResponse("table", { + headers: [{ name: "test_header" }], + rows: [{ values: [{ stringValue: "testing" }] }], + }) + ); + + /** Setup the http request and expectations */ + const req = { body: { calls: [[{ row1: [{ foo: "bar" }] }]] } }; + const res = { + send: (data: Record) => { + console.log(data); + expect(data.replies).toEqual([{ test_header: "testing" }]); + done(); + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(dlpClient.deidentifyContent).toHaveBeenCalled(); + expect(debugSpy).toBeCalledWith( + "Incoming request from BigQuery", + req.body.calls + ); + }); + + it("should deidentify data with valid input and a replaceWithInfoType technique and INFO_TYPE method", (done) => { + /* Set config data */ + mockedConfig.technique = "replaceWithInfoType"; + mockedConfig.method = "INFO_TYPE"; + + /** Configure mock responses */ + mockDeidentityContent.mockImplementation(async () => + createMockDeidentifyContentResponse( + "value", + "mock-deidentified-content" + ) + ); + + /** Setup the http request and expectations */ + const req = { body: { calls: [[{ row1: [{ foo: "bar" }] }]] } }; + const res = { + send: (data: Record) => { + expect(data.replies).toEqual([{ row1: "mock-deidentified-content" }]); + done(); + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(dlpClient.deidentifyContent).toHaveBeenCalled(); + expect(debugSpy).toBeCalledWith( + "Incoming request from BigQuery", + req.body.calls + ); + }); + + it("should deidentify data with valid input and a replaceWithInfoType technique and RECORD method", (done) => { + /* Set config data */ + mockedConfig.technique = "replaceWithInfoType"; + mockedConfig.method = "RECORD"; + + /** Configure mock responses */ + mockDeidentityContent.mockImplementation(async () => + createMockDeidentifyContentResponse("table", { + headers: [{ name: "test_header" }], + rows: [{ values: [{ stringValue: "testing" }] }], + }) + ); + + /** Setup the http request and expectations */ + const req = { body: { calls: [[{ row1: [{ foo: "bar" }] }]] } }; + const res = { + send: (data: Record) => { + console.log(data); + expect(data.replies).toEqual([{ test_header: "testing" }]); + done(); + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(dlpClient.deidentifyContent).toHaveBeenCalled(); + expect(debugSpy).toBeCalledWith( + "Incoming request from BigQuery", + req.body.calls + ); + }); + + it("should return invalid method with an unrecognised config method", (done) => { + /* Set config data */ + mockedConfig.technique = undefined; + mockedConfig.method = undefined; + + /** Setup the http request and expectations */ + const req = { body: { calls: [] } }; + const res = { + status: () => { + return { + send: (data: Record) => { + expect(data.errorMessage).toBe("Invalid method"); + done(); + }, + }; + }, + }; + + /** Run run function */ + //@ts-ignore + functions.deidentifyData(req, res); + + /** Check results */ + expect(debugSpy).toBeCalledWith("Incoming request from BigQuery", []); + }); + }); + + describe("reidentifyData", () => { + it("should deidentify data with valid input and a redact technique and INFO_TYPE method", (done) => { + //TODO: add tests if this fn is ready? Currently has now working case in the source code. + expect(true).toBeTruthy(); + done(); + }); + }); + + describe("createBigQueryConnection", () => { + let createBigQueryConnection: any; + + beforeAll(() => { + createBigQueryConnection = testEnv.wrap( + functions.createBigQueryConnection + ); + }); + + it("should successfully create a connection", async () => { + /** Configure mock responses */ + mockCreateConnection.mockImplementation(async () => { + return [ + { + connection: "example", + }, + ]; + }); + + mockCreateQueryJob.mockImplementation(async () => { + return [{ getQueryResults: jest.fn() }]; + }); + + mockSetProcessingState.mockImplementation(); + + /** Run function */ + await createBigQueryConnection({}); + + /** Check results */ + expect(mockCreateConnection).toBeCalledTimes(1); + expect(infoSpy).toBeCalledWith("Connection successfully created 🎉", [ + { + connection: "example", + }, + ]); + + expect(mockSetProcessingState).toBeCalledWith( + "PROCESSING_COMPLETE", + "Connections created successfully." + ); + }); + + it("should wanrn if a conneciton already exists", async () => { + /** Configure mock responses */ + mockCreateConnection.mockImplementation(async () => { + const error = new Error("Connection already exists"); + //@ts-ignore + error.code = 6; + throw error; + }); + + /** Run function */ + await createBigQueryConnection({}); + + /** Check results */ + expect(mockCreateConnection).toBeCalledTimes(1); + expect(warnSpy).toBeCalledWith( + "Connection ext-firestore-geo-functions already exists, will continue creating functions" + ); + }); + + it("should successfully throw an error if a function exists", async () => { + /** Configure mock responses */ + mockCreateConnection.mockImplementation(async () => { + return [ + { + connection: "example", + }, + ]; + }); + + mockCreateQueryJob.mockImplementation(async () => { + const error = new Error("Function already exists"); + //@ts-ignore + error.code = 6; + throw error; + }); + + /** Run function */ + await createBigQueryConnection({}); + + /** Check results */ + expect(mockCreateConnection).toBeCalledTimes(1); + expect(warnSpy).toBeCalledWith("Functions already exists."); + }); + + it("should successfully throw an error if an error code does not equal 6", async () => { + /** Configure mock responses */ + mockCreateConnection.mockImplementation(async () => { + return [ + { + connection: "example", + }, + ]; + }); + + mockCreateQueryJob.mockImplementation(async () => { + const error = new Error("Function already exists"); + //@ts-ignore + error.code = 4; + throw error; + }); + + /** Run function */ + await createBigQueryConnection({}); + + /** Check results */ + expect(mockCreateConnection).toBeCalledTimes(1); + + expect(mockSetProcessingState).toBeCalledWith( + "PROCESSING_FAILED", + "Connections were not created, check logs for more details." + ); + }); + }); +}); diff --git a/bigquery-dlp-functions/functions/__tests__/jest.setup.ts b/bigquery-dlp-functions/functions/__tests__/jest.setup.ts new file mode 100644 index 00000000..d83474ee --- /dev/null +++ b/bigquery-dlp-functions/functions/__tests__/jest.setup.ts @@ -0,0 +1,15 @@ +const path = require("path"); + +(async function () { + require("dotenv").config({ + path: path.resolve( + __dirname, + "../../../_emulator/extensions/firestore-places-autocomplete.env.local" + ), + }); + + process.env.EXT_INSTANCE_ID = "firestore-geo-functions"; + process.env.GCLOUD_PROJECT = "demo-test"; + process.env.PROJECT_ID = "demo-test"; + process.env.EVENTARC_CHANNEL = "my-eventarc-channel"; +})(); diff --git a/bigquery-dlp-functions/functions/__tests__/setupEnvironment.ts b/bigquery-dlp-functions/functions/__tests__/setupEnvironment.ts new file mode 100644 index 00000000..ae8eb1c7 --- /dev/null +++ b/bigquery-dlp-functions/functions/__tests__/setupEnvironment.ts @@ -0,0 +1,8 @@ +export default () => { + process.env.FIRESTORE_EMULATOR_HOST = "localhost:8080"; + process.env.FIREBASE_FIRESTORE_EMULATOR_ADDRESS = "localhost:8080"; + process.env.FIREBASE_AUTH_EMULATOR_HOST = "localhost:9099"; + process.env.PUBSUB_EMULATOR_HOST = "localhost:8085"; + process.env.GOOGLE_CLOUD_PROJECT = "demo-test"; + process.env.FIREBASE_STORAGE_EMULATOR_HOST = "localhost:9199"; +}; diff --git a/bigquery-dlp-functions/functions/__tests__/tsconfig.json b/bigquery-dlp-functions/functions/__tests__/tsconfig.json new file mode 100644 index 00000000..a32dd4f2 --- /dev/null +++ b/bigquery-dlp-functions/functions/__tests__/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../tsconfig.json", + "include": ["**/*"] +} diff --git a/bigquery-dlp-functions/functions/package.json b/bigquery-dlp-functions/functions/package.json new file mode 100644 index 00000000..23d1b745 --- /dev/null +++ b/bigquery-dlp-functions/functions/package.json @@ -0,0 +1,35 @@ +{ + "name": "functions", + "scripts": { + "lint": "eslint --ext .js,.ts .", + "build": "tsc", + "build:watch": "tsc --watch", + "serve": "npm run build && firebase emulators:start --only functions", + "shell": "npm run build && firebase functions:shell", + "start": "npm run shell", + "deploy": "firebase deploy --only functions", + "logs": "firebase functions:log", + "generate-readme": "node ../../generate-experimental-readme.js bigquery-dlp-functions > ../README.md" + }, + "main": "lib/index.js", + "dependencies": { + "@google-cloud/bigquery": "^6.0.3", + "@google-cloud/bigquery-connection": "^2.0.2", + "@google-cloud/dlp": "^4.2.0", + "firebase-admin": "^11.3.0", + "firebase-functions": "^4.2.0" + }, + "devDependencies": { + "@types/jest": "^29.5.0", + "@typescript-eslint/eslint-plugin": "^5.12.0", + "@typescript-eslint/parser": "^5.12.0", + "eslint": "^8.9.0", + "eslint-config-google": "^0.14.0", + "eslint-plugin-import": "^2.25.4", + "typescript": "^4.5.4", + "jest": "^29.5.0", + "ts-jest": "^29.0.5", + "firebase-functions-test": "^3.0.0" + }, + "private": true +} diff --git a/bigquery-dlp-functions/functions/src/config.ts b/bigquery-dlp-functions/functions/src/config.ts new file mode 100644 index 00000000..480a2e5e --- /dev/null +++ b/bigquery-dlp-functions/functions/src/config.ts @@ -0,0 +1,25 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export default { + projectId: process.env.PROJECT_ID, + extInstanceId: process.env.EXT_INSTANCE_ID, + datasetId: process.env.DATASET_ID, + location: process.env.LOCATION, + method: process.env.TRANSFORMATION_METHOD, + technique: process.env.TRANSFORMATION_TECHNIQUE, + fields: process.env.FIELDS_TO_TRANSFORM?.split(","), +}; diff --git a/bigquery-dlp-functions/functions/src/deidentify.ts b/bigquery-dlp-functions/functions/src/deidentify.ts new file mode 100644 index 00000000..01233c87 --- /dev/null +++ b/bigquery-dlp-functions/functions/src/deidentify.ts @@ -0,0 +1,99 @@ +import * as functions from "firebase-functions"; +import { DlpServiceClient } from "@google-cloud/dlp"; + +import { + MaskTransformation, + RedactTransformation, + ReplaceTransformation, + ReplaceWithInfoTypeTransformation, + rowsToTable, + tableToReplies, +} from "./transofmrations"; + +/** + * Deidentify sensitive data in a string with [the Data Loss Prevention API](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp) + * using `infoTypeTransformations` method. + * + * Read more about this method: https://cloud.google.com/dlp/docs/deidentify-sensitive-data#infotype_transformations + * + * @param {Array} rows The text to deidentify. + * + * @returns {Promise>} The deidentified record. + */ +export async function deidentifyWithInfoTypeTransformations( + rows: [], + client: DlpServiceClient, + transformation: + | MaskTransformation + | RedactTransformation + | ReplaceTransformation + | ReplaceWithInfoTypeTransformation +): Promise> { + const deidentifiedItems = []; + + for (const row of rows) { + const data = row[0] as Record; + + for (const key in data) { + if (data.hasOwnProperty(key)) { + const element = data[key]; + const request = { + ...transformation.deidentifyConfig, + item: { value: element }, + }; + + const [response] = await client.deidentifyContent(request); + data[key] = response.item?.value; + } + } + + deidentifiedItems.push(data); + } + + return deidentifiedItems; +} + +/** + * Deidentify sensitive data in a string with the [Data Loss Prevention API](https://cloud.google.com/architecture/de-identification-re-identification-pii-using-cloud-dlp) + * using `recordTransformations` method. + * + * Read more about this method: https://cloud.google.com/dlp/docs/deidentify-sensitive-data#record_transformations + * + * @param {Array} rows The rows with data to deidentify. + * + * @returns {Promise} The deidentified text. + */ +export async function deidentifyWithRecordTransformations( + rows: [], + client: DlpServiceClient, + transformation: + | MaskTransformation + | RedactTransformation + | ReplaceTransformation + | ReplaceWithInfoTypeTransformation +) { + let table; + + try { + // Convert raw rows to Table type + table = rowsToTable(rows); + } catch (error) { + functions.logger.debug(`Error converting rows to Table type.`); + throw error; + } + + // Construct de-identification request + const request = { + ...transformation.deidentifyConfig, + item: { + table: table, + }, + }; + + // Run deidentification request + const [response] = await client.deidentifyContent(request); + + functions.logger.debug(tableToReplies(response.item?.table)); + + return tableToReplies(response.item?.table); +} diff --git a/bigquery-dlp-functions/functions/src/index.ts b/bigquery-dlp-functions/functions/src/index.ts new file mode 100644 index 00000000..7c1452c3 --- /dev/null +++ b/bigquery-dlp-functions/functions/src/index.ts @@ -0,0 +1,207 @@ +import * as functions from "firebase-functions"; +import * as admin from "firebase-admin"; +import { DlpServiceClient } from "@google-cloud/dlp"; +import { ConnectionServiceClient } from "@google-cloud/bigquery-connection"; +import { BigQuery } from "@google-cloud/bigquery"; +import { getExtensions } from "firebase-admin/extensions"; + +import config from "./config"; +import { + deidentifyWithInfoTypeTransformations, + deidentifyWithRecordTransformations, +} from "./deidentify"; +import { + MaskTransformation, + RedactTransformation, + ReplaceTransformation, + ReplaceWithInfoTypeTransformation, +} from "./transofmrations"; +import { reidentifyWithInfoTypeTransformations } from "./reidentify"; + +admin.initializeApp(); + +const bigqueryClient = new BigQuery(); +const bigqueryConnectionClient = new ConnectionServiceClient(); + +const dlp = new DlpServiceClient(); + +exports.deidentifyData = functions.https.onRequest( + async (request, response) => { + const { calls } = request.body; + + functions.logger.debug("Incoming request from BigQuery", calls); + var transformation; + + switch (config.technique) { + case "redact": + transformation = new RedactTransformation(); + break; + case "fixed": + transformation = new ReplaceTransformation(); + break; + case "replaceWithInfoType": + transformation = new ReplaceWithInfoTypeTransformation(); + break; + default: + transformation = new MaskTransformation(); + } + + try { + switch (config.method) { + case "INFO_TYPE": + response.send({ + replies: await deidentifyWithInfoTypeTransformations( + calls, + dlp, + transformation + ), + }); + break; + case "RECORD": + response.send({ + replies: await deidentifyWithRecordTransformations( + calls, + dlp, + transformation + ), + }); + break; + default: + response.status(400).send({ errorMessage: "Invalid method" }); + break; + } + } catch (error) { + functions.logger.error(error); + + response.status(400).send({ errorMessage: error }); + } + } +); + +exports.reidentifyData = functions.https.onRequest( + async (request, response) => { + const { calls } = request.body; + + functions.logger.debug("Incoming request from BigQuery", calls); + + var transformation; + + switch (config.technique) { + default: + response.status(400).send("Invalid or irreversable technique"); + return; + } + + try { + if (config.method === "INFO_TYPE") { + response.send({ + replies: await reidentifyWithInfoTypeTransformations( + calls, + dlp, + transformation + ), + }); + } else if (config.method === "RECORD") { + response.send({ + replies: await deidentifyWithRecordTransformations( + calls, + dlp, + transformation + ), + }); + } else { + response.status(400).send("Invalid method"); + } + } catch (error) { + functions.logger.error(error); + response.status(400).send({ errorMessage: error }); + } + } +); + +export const createBigQueryConnection = functions.tasks + .taskQueue() + .onDispatch(async () => { + const runtime = getExtensions().runtime(); + + const parent = `projects/${config.projectId}/locations/${config.location}`; + const instanceId = "ext-" + config.extInstanceId; + var connection; + + try { + connection = await bigqueryConnectionClient.createConnection({ + parent: parent, + connectionId: instanceId, + connection: { + cloudResource: { + serviceAccountId: `${instanceId}@${config.projectId}.iam.gserviceaccount.com`, + }, + name: instanceId, + friendlyName: "DLP Extension", + }, + }); + + functions.logger.info("Connection successfully created 🎉", connection); + } catch (error: any) { + if (error["code"] === 6) { + functions.logger.warn( + `Connection ${instanceId} already exists, will continue creating functions` + ); + } else { + functions.logger.error(error); + await runtime.setProcessingState( + "PROCESSING_FAILED", + "Error creating connection. Check logs for more details." + ); + + return; + } + } + + try { + const query = ` + BEGIN + CREATE FUNCTION \`${config.projectId}.${config.datasetId}\`.deidentify(data JSON) RETURNS JSON + REMOTE WITH CONNECTION \`${config.projectId}.${config.location}.${instanceId}\` + OPTIONS ( + endpoint = 'https://${config.location}-${config.projectId}.cloudfunctions.net/${instanceId}-deidentifyData' + ); + CREATE FUNCTION \`${config.projectId}.${config.datasetId}\`.reidentify(data JSON) RETURNS JSON + REMOTE WITH CONNECTION \`${config.projectId}.${config.location}.${instanceId}\` + OPTIONS ( + endpoint = 'https://${config.location}-${config.projectId}.cloudfunctions.net/${instanceId}-reidentifyData' + ); + END; + `; + + const options = { + query: query, + location: config.location, + }; + + // Run the query as a job + const [job] = await bigqueryClient.createQueryJob(options); + functions.logger.debug(`Job ${job.id} started.`); + + // Wait for the query to finish + await job.getQueryResults(); + + await runtime.setProcessingState( + "PROCESSING_COMPLETE", + "Connections created successfully." + ); + } catch (error: any) { + if (error["code"] === 6) { + functions.logger.warn(`Functions already exists.`); + } else { + functions.logger.error(error); + + await runtime.setProcessingState( + "PROCESSING_FAILED", + "Connections were not created, check logs for more details." + ); + + return; + } + } + }); diff --git a/bigquery-dlp-functions/functions/src/reidentify.ts b/bigquery-dlp-functions/functions/src/reidentify.ts new file mode 100644 index 00000000..d362ac15 --- /dev/null +++ b/bigquery-dlp-functions/functions/src/reidentify.ts @@ -0,0 +1,44 @@ +import * as functions from "firebase-functions"; +import { DlpServiceClient } from "@google-cloud/dlp"; + +import { MaskTransformation, RedactTransformation } from "./transofmrations"; +import config from "./config"; + +export async function reidentifyWithInfoTypeTransformations( + rows: [], + client: DlpServiceClient, + transformation: MaskTransformation | RedactTransformation +) { + const reidentifiedItems = []; + + const parent = `projects/${config.projectId}/locations/${config.location}`; + + if (transformation instanceof MaskTransformation) { + functions.logger.debug("Mask Transformation is irreversable"); + throw new Error("Mask Transformation is irreversable"); + } + + for (const row of rows) { + const data = row[0] as Record; + functions.logger.debug(data); + + for (const key in data) { + if (data.hasOwnProperty(key)) { + const element = data[key]; + const request = { + ...transformation.reidentifyConfig, + item: { value: element }, + parent: parent, + }; + + const [response] = await client.deidentifyContent(request); + data[key] = response.item?.value; + } + } + + functions.logger.debug(data); + reidentifiedItems.push(data); + } + + return reidentifiedItems; +} diff --git a/bigquery-dlp-functions/functions/src/transofmrations.ts b/bigquery-dlp-functions/functions/src/transofmrations.ts new file mode 100644 index 00000000..223f588c --- /dev/null +++ b/bigquery-dlp-functions/functions/src/transofmrations.ts @@ -0,0 +1,261 @@ +import config from "./config"; +import { protos } from "@google-cloud/dlp"; + +type ReidentifyRequest = protos.google.privacy.dlp.v2.IReidentifyContentRequest; +type DeidentifyRequest = protos.google.privacy.dlp.v2.IDeidentifyContentRequest; +type FieldId = protos.google.privacy.dlp.v2.FieldId; +type Row = protos.google.privacy.dlp.v2.Table.Row; +type Table = protos.google.privacy.dlp.v2.ITable | undefined | null; + +class Transformation { + parent: string; + deidentifyConfig: DeidentifyRequest = {}; + reidentifyConfig: ReidentifyRequest = {}; + + constructor() { + this.parent = `projects/${config.projectId}/locations/${config.location}`; + } +} + +export class MaskTransformation extends Transformation { + /** + * Replace a value by a mask character. + * + * @param mask The character to mask the sensitive data with. If not supplied, defaults to `x`. + * @param numberToMask The number of characters to mask. If not supplied, defaults to `5`. + */ + constructor(mask?: string, numberToMask?: number) { + super(); + const maskingConfig = { + ...(config.method == "INFO_TYPE" && { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + characterMaskConfig: { + maskingCharacter: mask ?? "x", + numberToMask: numberToMask ?? 5, + }, + }, + }, + ], + }, + }), + ...(config.method == "RECORD" && { + recordTransformations: { + fieldTransformations: [ + { + fields: getFieldIds(), + primitiveTransformation: { + characterMaskConfig: { + maskingCharacter: mask ?? "x", + numberToMask: numberToMask ?? 5, + }, + }, + }, + ], + }, + }), + }; + + this.deidentifyConfig = { + parent: this.parent, + deidentifyConfig: maskingConfig, + }; + } +} + +export class RedactTransformation extends Transformation { + /** + * Redacts a value by removing it. + */ + constructor() { + super(); + const redactConfig = { + ...(config.method == "INFO_TYPE" && { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + redactConfig: {}, + }, + }, + ], + }, + }), + ...(config.method == "RECORD" && { + recordTransformations: { + fieldTransformations: [ + { + fields: getFieldIds(), + primitiveTransformation: { + redactConfig: {}, + }, + }, + ], + }, + }), + }; + + this.deidentifyConfig = { + parent: this.parent, + deidentifyConfig: redactConfig, + }; + } +} + +export class ReplaceTransformation extends Transformation { + /** + * Replace with a specified value. + */ + constructor() { + super(); + const _replaceConfig = { + newValue: { + // TODO make configurable? + stringValue: "REPLACED", + }, + }; + + const replaceConfig = { + ...(config.method == "INFO_TYPE" && { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + replaceConfig: _replaceConfig, + }, + }, + ], + }, + }), + ...(config.method == "RECORD" && { + recordTransformations: { + fieldTransformations: [ + { + fields: getFieldIds(), + primitiveTransformation: { + replaceConfig: _replaceConfig, + }, + }, + ], + }, + }), + }; + + this.deidentifyConfig = { + parent: this.parent, + deidentifyConfig: replaceConfig, + }; + } +} + +export class ReplaceWithInfoTypeTransformation extends Transformation { + /** + * Replace with a specified value. + */ + constructor() { + super(); + const _replaceConfig = { + // TODO make configurable? + partToExtract: "MONTH", + }; + + const replaceConfig = { + ...(config.method == "INFO_TYPE" && { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + replaceWithInfoTypeConfig: _replaceConfig, + }, + }, + ], + }, + }), + ...(config.method == "RECORD" && { + recordTransformations: { + fieldTransformations: [ + { + fields: getFieldIds(), + primitiveTransformation: { + replaceWithInfoTypeConfig: _replaceConfig, + }, + }, + ], + }, + }), + }; + + this.deidentifyConfig = { + parent: this.parent, + deidentifyConfig: replaceConfig, + }; + } +} + +export function rowsToTable(rows: []) { + let table = { + headers: [] as FieldId[], + rows: [] as Row[], + }; + + for (const row of rows) { + const data = row[0] as Record; + + const keys = Object.keys(data); + const values = Object.values(data); + + if (table.headers.length === 0) { + // Add headers only once + table.headers = keys.map((key) => { + const field = protos.google.privacy.dlp.v2.FieldId.create({ + name: key, + }); + return field; + }); + } + + const tableRow = protos.google.privacy.dlp.v2.Table.Row.create({ + values: values.map((v) => { + const field = protos.google.privacy.dlp.v2.Value.create({ + stringValue: v, + }); + return field; + }), + }); + + table.rows.push(tableRow); + } + + return table; +} + +function getFieldIds() { + const fieldIds = config.fields?.map((field) => { + return { name: field }; + }); + return fieldIds; +} + +export function tableToReplies(table: Table) { + const replies = []; + const rows = table?.rows?.map((row) => + row.values?.map((value) => value.stringValue) + ); + + if (!rows || !table || !table.headers) return []; + + for (const row of rows) { + const reply = {} as Record; + + for (let i = 0; i < table.headers.length; i++) { + const header = table.headers[i].name as string; + reply[header] = row![i]; + } + + replies.push(reply); + } + + return replies; +} diff --git a/bigquery-dlp-functions/functions/tsconfig.json b/bigquery-dlp-functions/functions/tsconfig.json new file mode 100644 index 00000000..a9ed863a --- /dev/null +++ b/bigquery-dlp-functions/functions/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "module": "commonjs", + "noImplicitReturns": true, + "noUnusedLocals": true, + "outDir": "lib", + "sourceMap": true, + "strict": true, + "target": "es2017" + }, + "compileOnSave": true, + "include": ["src"] +} diff --git a/firestore-auth-claims/functions/tsconfig.json b/firestore-auth-claims/functions/tsconfig.json index a9ed863a..9d2d8f96 100644 --- a/firestore-auth-claims/functions/tsconfig.json +++ b/firestore-auth-claims/functions/tsconfig.json @@ -1,13 +1,13 @@ { "compilerOptions": { + "lib": ["esnext.asynciterable", "es2020", "es6"], + "outDir": "lib", "module": "commonjs", "noImplicitReturns": true, - "noUnusedLocals": true, - "outDir": "lib", - "sourceMap": true, - "strict": true, - "target": "es2017" + "sourceMap": false, + "target": "es6" }, "compileOnSave": true, - "include": ["src"] + "include": ["src"], + "exclude": ["node_modules"] }