Skip to content
This repository was archived by the owner on Oct 25, 2023. It is now read-only.

Commit 99d90a3

Browse files
author
George Price
committed
Merge branch 'master' of github.com:awslabs/amazon-textract-document-understanding-solution
2 parents 37dfc36 + 43ff253 commit 99d90a3

File tree

8 files changed

+1067
-287
lines changed

8 files changed

+1067
-287
lines changed

deployment/custom-deployment/bin/stack-variables-to-client.sh

Lines changed: 0 additions & 9 deletions
This file was deleted.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
source .env
4+
5+
#STACK
6+
7+
echo "==update %%CLIENT_APP_BUCKET%% in stack with $2=="
8+
replace="s/%%CLIENT_APP_BUCKET%%/$ClientAppBucketName/g"
9+
sed -i -e $replace ./lib/cdk-textract-client-stack.js
10+
11+
echo "==update Elastic Search Cluster ($ElasticSearchCluster) with log streams to Log Groups: $ElasticSearchSearchLogGroup and $ElasticSearchIndexLogGroup"
12+
INDEX_LOG_ARN=$(aws logs describe-log-groups --log-group-name $ElasticSearchIndexLogGroup | jq -r '.logGroups[0].arn')
13+
SEARCH_LOG_ARN=$(aws logs describe-log-groups --log-group-name $ElasticSearchSearchLogGroup | jq -r '.logGroups[0].arn')
14+
15+
echo "==adding permissions to ES service role first for creating log stream"
16+
aws logs put-resource-policy --policy-name es-to-log-stream --policy-document '{ "Version": "2012-10-17", "Statement": [ { "Sid": "ElasticSearchLogsToCloudWatchLogs", "Effect": "Allow", "Principal": { "Service": [ "es.amazonaws.com" ] }, "Action":["logs:PutLogEvents", "logs:CreateLogStream", "logs:DeleteLogStream"], "Resource": "*" } ] }'
17+
18+
echo "==Log Groups are $INDEX_LOG_ARN and $SEARCH_LOG_ARN"
19+
aws es update-elasticsearch-domain-config --domain-name $ElasticSearchCluster --log-publishing-options '{"INDEX_SLOW_LOGS": { "CloudWatchLogsLogGroupArn": "'"$INDEX_LOG_ARN"'", "Enabled": true }, "SEARCH_SLOW_LOGS": { "CloudWatchLogsLogGroupArn": "'"$SEARCH_LOG_ARN"'", "Enabled": true } }'
20+
21+

deployment/document-understanding-solution.template

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ Resources:
170170
commands:
171171
- echo "This buildspec is based on image - aws/codebuild/amazonlinux2-x86_64-standard:2.0"
172172
- node --version
173+
- echo "Installing jq package"
174+
- wget -O jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64
175+
- chmod +x ./jq
176+
- cp jq /usr/bin
173177
- npm install -g cdk@1.18.0
174178
- cdk --version
175179
- npm install -g typescript
@@ -541,9 +545,11 @@ Resources:
541545
"Action": [
542546
"logs:CreateLogGroup",
543547
"logs:CreateLogStream",
548+
"logs:DeleteLogStream",
544549
"logs:PutLogEvents",
545550
"logs:Describe*",
546-
"logs:PutRetentionPolicy"
551+
"logs:PutRetentionPolicy",
552+
"logs:PutResourcePolicy"
547553
]
548554
},
549555
{
@@ -672,7 +678,8 @@ Resources:
672678
"es:CreateElasticsearchServiceRole",
673679
"es:DeleteElasticsearchDomain",
674680
"es:DeleteElasticsearchServiceRole",
675-
"es:List*"
681+
"es:List*",
682+
"es:UpdateElasticsearchDomainConfig"
676683
]
677684
},
678685
{
@@ -686,7 +693,10 @@ Resources:
686693
"iam:Get*",
687694
"iam:AttachRolePolicy",
688695
"iam:PutRolePolicy",
689-
"iam:CreateServiceLinkedRole"
696+
"iam:DeleteRolePolicy",
697+
"iam:DetachRolePolicy",
698+
"iam:CreateServiceLinkedRole",
699+
"iam:DeleteServiceLinkedRole"
690700
]
691701
},
692702
{

source/bin/pre-build.js

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
/**********************************************************************************************************************
32
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. *
43
* *
@@ -118,6 +117,15 @@ const GetResources = new Promise((resolve, reject) => {
118117
resources.PdfGenLambda = stackDescriptionObj.find((x) =>
119118
/pdfgenerator/i.test(x.LogicalResourceId)
120119
).PhysicalResourceId;
120+
resources.ElasticSearchSearchLogGroup = stackDescriptionObj.find((x) =>
121+
/ElasticSearchSearchLogGroup/i.test(x.LogicalResourceId)
122+
).PhysicalResourceId;
123+
resources.ElasticSearchIndexLogGroup = stackDescriptionObj.find((x) =>
124+
/ElasticSearchIndexLogGroup/i.test(x.LogicalResourceId)
125+
).PhysicalResourceId;
126+
resources.ElasticSearchCluster = stackDescriptionObj.find((x) =>
127+
/ElasticSearchCluster/i.test(x.LogicalResourceId)
128+
).PhysicalResourceId;
121129

122130
resolve(resources);
123131
});
@@ -130,6 +138,6 @@ const setEnv = async () => {
130138
outputArray.push(`${key}=${data[key]}`);
131139
});
132140
fs.writeFileSync(".env", outputArray.join("\n"));
133-
fs.appendFileSync(".env","\nisROMode="+isROMode);
141+
fs.appendFileSync(".env", "\nisROMode=" + isROMode);
134142
};
135143
setEnv();

source/lib/cdk-textract-stack.ts

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ import { QueueEncryption } from "@aws-cdk/aws-sqs";
4646
import { LogGroup } from "@aws-cdk/aws-logs";
4747
import { LogGroupLogDestination } from "@aws-cdk/aws-apigateway";
4848

49-
const API_CONCURRENT_REQUESTS = 20; //approximate number of 1-2 page documents to be processed parallelly
49+
const API_CONCURRENT_REQUESTS = 20; //approximate number of 1-2 page documents to be processed in parallell
5050

5151
export interface TextractStackProps {
5252
email: string;
@@ -171,12 +171,14 @@ export class CdkTextractStack extends cdk.Stack {
171171
behaviors: [{ isDefaultBehavior: true }],
172172
},
173173
],
174-
errorConfigurations: [{
175-
errorCode: 404,
176-
responseCode: 200,
177-
errorCachingMinTtl: 5,
178-
responsePagePath: '/index.html'
179-
}],
174+
errorConfigurations: [
175+
{
176+
errorCode: 404,
177+
responseCode: 200,
178+
errorCachingMinTtl: 5,
179+
responsePagePath: "/index.html",
180+
},
181+
],
180182
priceClass: PriceClass.PRICE_CLASS_100,
181183
httpVersion: HttpVersion.HTTP2,
182184
enableIpV6: true,
@@ -231,11 +233,19 @@ export class CdkTextractStack extends cdk.Stack {
231233
cloudfrontDocumentsBucketPolicyStatement
232234
);
233235

234-
const esLogGroup = new LogGroup(
236+
const esSearchLogGroup = new LogGroup(
237+
this,
238+
this.resourceName("ElasticSearchSearchLogGroup"),
239+
{
240+
logGroupName: this.resourceName("ElasticSearchSearchLogGroup"),
241+
}
242+
);
243+
244+
const esIndexLogGroup = new LogGroup(
235245
this,
236-
this.resourceName("ElasticSearchLogGroup"),
246+
this.resourceName("ElasticSearchIndexLogGroup"),
237247
{
238-
logGroupName: this.resourceName("ElasticSearchLogGroup"),
248+
logGroupName: this.resourceName("ElasticSearchIndexLogGroup"),
239249
}
240250
);
241251

@@ -270,18 +280,6 @@ export class CdkTextractStack extends cdk.Stack {
270280
}
271281
);
272282
} else {
273-
const serviceLinkedRole = new cdk.CfnResource(
274-
this,
275-
this.resourceName("es-service-linked-role"),
276-
{
277-
type: "AWS::IAM::ServiceLinkedRole",
278-
properties: {
279-
AWSServiceName: "es.amazonaws.com",
280-
Description: "Role for ES to access resources in my VPC",
281-
},
282-
}
283-
);
284-
285283
elasticSearch = new es.CfnDomain(
286284
this,
287285
this.resourceName("ElasticSearchCluster"),
@@ -308,28 +306,43 @@ export class CdkTextractStack extends cdk.Stack {
308306
nodeToNodeEncryptionOptions: {
309307
enabled: true,
310308
},
311-
logPublishingOptions: {
312-
INDEX_SLOW_LOGS: {
313-
cloudWatchLogsLogGroupArn: esLogGroup.logGroupArn,
314-
enabled: true,
315-
},
316-
SEARCH_SLOW_LOGS: {
317-
cloudWatchLogsLogGroupArn: esLogGroup.logGroupArn,
318-
enabled: true,
319-
},
320-
},
321309
}
322310
);
323-
324-
elasticSearch.node.addDependency(serviceLinkedRole);
325311
}
326312

313+
const jobResultsKey = new kms.Key(
314+
this,
315+
this.resourceName("JobResultsKey"),
316+
{
317+
enableKeyRotation: true,
318+
enabled: true,
319+
trustAccountIdentities: true,
320+
policy: new iam.PolicyDocument({
321+
assignSids: true,
322+
statements: [
323+
new iam.PolicyStatement({
324+
actions: ["kms:GenerateDataKey*", "kms:Decrypt"],
325+
resources: ["*"], // Resource level permissions are not necessary in this policy statement, as it is automatically restricted to this key
326+
effect: iam.Effect.ALLOW,
327+
principals: [
328+
new iam.ServicePrincipal("sns.amazonaws.com"),
329+
new iam.ServicePrincipal("lambda.amazonaws.com"),
330+
new iam.ServicePrincipal("textract.amazonaws.com"),
331+
new iam.ServicePrincipal("sqs.amazonaws.com"),
332+
],
333+
}),
334+
],
335+
}),
336+
}
337+
);
338+
327339
// SNS Topic
328340
const jobCompletionTopic = new sns.Topic(
329341
this,
330-
this.resourceName("JobCompletion"),
342+
this.resourceName("JobCompletionTopic"),
331343
{
332344
displayName: "Job completion topic",
345+
masterKey: jobResultsKey,
333346
}
334347
);
335348

@@ -349,6 +362,13 @@ export class CdkTextractStack extends cdk.Stack {
349362
resources: [jobCompletionTopic.topicArn],
350363
})
351364
);
365+
textractServiceRole.addToPolicy(
366+
new iam.PolicyStatement({
367+
effect: iam.Effect.ALLOW,
368+
actions: ["kms:Decrypt", "kms:GenerateDataKey*"],
369+
resources: [jobResultsKey.keyArn],
370+
})
371+
);
352372

353373
// DynamoDB tables
354374
const outputTable = new ddb.Table(this, this.resourceName("OutputTable"), {
@@ -440,6 +460,7 @@ export class CdkTextractStack extends cdk.Stack {
440460
{
441461
visibilityTimeout: cdk.Duration.seconds(900),
442462
retentionPeriod: cdk.Duration.seconds(1209600),
463+
encryption: QueueEncryption.KMS_MANAGED,
443464
}
444465
);
445466

@@ -449,12 +470,16 @@ export class CdkTextractStack extends cdk.Stack {
449470
{
450471
visibilityTimeout: cdk.Duration.seconds(900),
451472
retentionPeriod: cdk.Duration.seconds(1209600),
473+
encryption: QueueEncryption.KMS,
474+
encryptionMasterKey: jobResultsKey,
475+
dataKeyReuse: cdk.Duration.seconds(86400),
452476
deadLetterQueue: {
453477
maxReceiveCount: 3,
454478
queue: jobResultsDLQueue,
455479
},
456480
}
457481
);
482+
458483
// trigger
459484
jobCompletionTopic.addSubscription(
460485
new snsSubscriptions.SqsSubscription(jobResultsQueue)
@@ -874,6 +899,7 @@ export class CdkTextractStack extends cdk.Stack {
874899
jobResultProcessor.addLayers(textractorLayer);
875900
jobResultProcessor.addLayers(boto3Layer);
876901
jobResultProcessor.addLayers(elasticSearchLayer);
902+
jobResultsKey.grantEncryptDecrypt(jobResultProcessor);
877903

878904
// Triggers
879905
jobResultProcessor.addEventSource(

0 commit comments

Comments
 (0)