1+ from aws_cdk import (
2+ aws_ec2 as ec2 ,
3+ aws_ssm as ssm ,
4+ aws_codeartifact as codeartifact ,
5+ aws_stepfunctions as sfn ,
6+ aws_glue_alpha as glue ,
7+ aws_iam as iam ,
8+ aws_s3 as s3 ,
9+ aws_logs as logs ,
10+ aws_s3_deployment as s3_deployment ,
11+ Aspects ,Stack ,RemovalPolicy ,Aws ,Duration ,CfnOutput
12+
13+ )
14+ from constructs import Construct
15+
16+ import json ,os
17+
18+ from cdk_nag import ( AwsSolutionsChecks , NagSuppressions )
19+
20+ class ApplicationStack (Stack ):
21+
22+ def create_pypi_repo (self ):
23+ artifact_repo = codeartifact .CfnRepository (self ,
24+ id = self .pypi_repo_name ,
25+ domain_name = self .domain_name ,
26+ repository_name = self .pypi_repo_name ,
27+ external_connections = ["public:pypi" ],
28+ description = "Provides PyPI artifacts from PyPA." )
29+ return artifact_repo
30+
31+ def create_code_repo (self ):
32+ code_repo = codeartifact .CfnRepository (self ,
33+ id = self .repo_name ,
34+ domain_name = self .domain_name ,
35+ repository_name = self .repo_name ,
36+ upstreams = [self .pypi_repo_name ],
37+ description = "Internal python package repository." )
38+ return code_repo
39+
40+
41+
42+ def __init__ (self , scope : Construct , construct_id : str , cidr_block : str ,** kwargs ) -> None :
43+ super ().__init__ (scope , construct_id , ** kwargs )
44+
45+ ############################################
46+ ##
47+ ## CDK Nag - https://pypi.org/project/cdk-nag/
48+ ## https://github.com/cdklabs/cdk-nag
49+ ##
50+ ## CDK Nag Checks for AWS Engagement Solutions Secuirty Rules:
51+ ## https://github.com/cdklabs/cdk-nag/blob/main/RULES.md#awssolutions
52+ ## Also checks for:
53+ ## HIPAA Security
54+ ## NIST 800-53 rev 4
55+ ## NIST 800-53 rev 5
56+ ##
57+ ############################################
58+ Aspects .of (self ).add (AwsSolutionsChecks ())
59+ ##
60+ ## Supressed Errors
61+ ##
62+ NagSuppressions .add_stack_suppressions (self , [{"id" :"AwsSolutions-S1" , "reason" :"TODO: Set *server_access_logs_bucket* and *server_access_logs_prefix* to enable server access logging." }])
63+ NagSuppressions .add_stack_suppressions (self , [{"id" :"AwsSolutions-IAM4" , "reason" :"TODO: Stop using AWS managed policies." }])
64+ NagSuppressions .add_stack_suppressions (self , [{"id" :"AwsSolutions-IAM5" , "reason" :"TODO: Remove Wildcards in IAM roles." }])
65+ NagSuppressions .add_stack_suppressions (self , [{"id" :"AwsSolutions-SF2" , "reason" :"TODO: Set the X-Ray Tracing on the Step Function." }])
66+ NagSuppressions .add_stack_suppressions (self , [{"id" :"AwsSolutions-SF1" , "reason" :"TODO: Set the Step Function CloudWatch Logs log events to 'ALL' " }])
67+
68+ ## Variable Initialization
69+ cdk_account_id :str = os .environ ["CDK_DEFAULT_ACCOUNT" ]
70+
71+ # The code that defines your stack goes here
72+
73+ ########################################
74+ ##
75+ ## VPC
76+ ##
77+ #########################################
78+
79+ self .vpc = ec2 .Vpc (self , 'enterprise-repo-vpc' ,
80+ gateway_endpoints = {
81+ "S3" : ec2 .GatewayVpcEndpointOptions (
82+ service = ec2 .GatewayVpcEndpointAwsService .S3
83+ )
84+ },
85+ vpc_name = 'enterprise-repo-vpc' ,
86+ cidr = cidr_block ,
87+ max_azs = 1 ,
88+ enable_dns_hostnames = True ,
89+ enable_dns_support = True ,
90+ subnet_configuration = [
91+ ec2 .SubnetConfiguration (
92+ name = 'Enterprise-Repo-Private-' ,
93+ subnet_type = ec2 .SubnetType .PRIVATE_ISOLATED ,
94+ cidr_mask = 26
95+ )
96+ ],
97+ )
98+ priv_subnets = [subnet .subnet_id for subnet in self .vpc .private_subnets ]
99+
100+ count = 1
101+ for psub in priv_subnets :
102+ ssm .StringParameter (self , 'enterprise-repo-private-subnet-' + str (count ),
103+ string_value = psub ,
104+ parameter_name = '/enterprise-repo/private-subnet-' + str (count )
105+ )
106+ count += 1
107+
108+ log_group = logs .LogGroup (self , "enterprise-repo-log-group" )
109+
110+ role = iam .Role (self , "enterprise-repo-vpc-flow-log-role" ,
111+ assumed_by = iam .ServicePrincipal ("vpc-flow-logs.amazonaws.com" )
112+ )
113+
114+ ec2 .FlowLog (self , "enterprise-repo-vpc-flow-log" ,
115+ resource_type = ec2 .FlowLogResourceType .from_vpc (self .vpc ),
116+ destination = ec2 .FlowLogDestination .to_cloud_watch_logs (log_group , role )
117+ )
118+
119+ ########################################
120+ ##
121+ ## S3 Bucket
122+ ##
123+ #########################################
124+
125+ bucket = s3 .Bucket (self ,
126+ "enterprise-repo-bucket" ,
127+ bucket_name = "codeartifactblog-" + str (cdk_account_id [- 5 :])+ "-" + Aws .REGION ,
128+ auto_delete_objects = True ,
129+ removal_policy = RemovalPolicy .DESTROY ,
130+ block_public_access = s3 .BlockPublicAccess .BLOCK_ALL ,
131+ encryption = s3 .BucketEncryption .S3_MANAGED )
132+
133+ s3_deployment .BucketDeployment (self ,
134+ "enterprise-repo-bucket-deployment" ,
135+ sources = [s3_deployment .Source .asset ("./scripts/s3" )],
136+ destination_bucket = bucket ,
137+ destination_key_prefix = "data" )
138+
139+ ########################################
140+ ##
141+ ## Code Artifact VPC InterFace Endpoint
142+ ##
143+ #########################################
144+
145+ self .vpc .add_interface_endpoint ("CodeArtifactEndPoint" ,
146+ service = ec2 .InterfaceVpcEndpointService (f'com.amazonaws.{ Aws .REGION } .codeartifact.api' ),
147+ subnets = ec2 .SubnetType .PRIVATE_ISOLATED )
148+
149+ self .vpc .add_interface_endpoint ("CodeArtifactRepositoriesEndPoint" ,
150+ service = ec2 .InterfaceVpcEndpointService (f'com.amazonaws.{ Aws .REGION } .codeartifact.repositories' ),
151+ subnets = ec2 .SubnetType .PRIVATE_ISOLATED ,
152+ private_dns_enabled = True )
153+
154+ self .vpc .add_interface_endpoint ("GlueRepositoriesEndPoint" ,
155+ service = ec2 .InterfaceVpcEndpointService (f'com.amazonaws.{ Aws .REGION } .glue' ),
156+ subnets = ec2 .SubnetType .PRIVATE_ISOLATED ,
157+ private_dns_enabled = True )
158+
159+ ########################################
160+ ##
161+ ## Code Artifact Domain and Repository Creation
162+ ##
163+ #########################################
164+ # Name for the pypi repo we create to mirror pypi.
165+ self .domain = None
166+ self .domain_name = 'enterprise-repo-domain'
167+ self .pypi_repo_name = "pypi-store"
168+ self .repo_name = "enterprise-repo"
169+ self .domain = codeartifact .CfnDomain (self , "cfndomain" , domain_name = self .domain_name )
170+
171+ self .pypi_repo = self .create_pypi_repo ()
172+ self .code_repo = self .create_code_repo ()
173+
174+ # Specify the dependencies so the stack can be properly created.
175+ self .pypi_repo .add_depends_on (self .domain )
176+ self .code_repo .add_depends_on (self .pypi_repo )
177+
178+ code_artifact_url = f"https://aws:{{}}@{ self .domain_name } -{ Aws .ACCOUNT_ID } .d.codeartifact.{ Aws .REGION } .amazonaws.com/pypi/{ self .repo_name } /simple/"
179+
180+ ########################################
181+ ##
182+ ## Glue Connection
183+ ##
184+ #########################################
185+ self .sg_glue_conn = ec2 .SecurityGroup (self ,
186+ id = 'sg_demo_glue_conn' ,
187+ vpc = self .vpc ,
188+ allow_all_outbound = True ,
189+ description = 'Security Group for Glue Connection' )
190+ self .sg_glue_conn .add_ingress_rule (peer = self .sg_glue_conn ,
191+ connection = ec2 .Port .all_traffic ())
192+
193+ ####################################
194+ ##
195+ ## GLue Job Role Policy
196+ ##
197+ ####################################
198+ glue_job_role_iam_policy = iam .ManagedPolicy (self ,
199+ "GlueJobIamPolicy" ,
200+ managed_policy_name = 'enterprise-repo-glue-job-policy' ,
201+ description = "Glue Job IAM Policy" )
202+
203+ glue_job_role_iam_policy .add_statements (iam .PolicyStatement (effect = iam .Effect .ALLOW ,
204+ actions = ["s3:*" ],
205+ resources = ["" + bucket .bucket_arn + "/*" ,
206+ "" + bucket .bucket_arn + "" ],))
207+
208+ glue_job_role_iam_policy .add_statements (iam .PolicyStatement (effect = iam .Effect .ALLOW ,
209+ actions = ["iam:PassRole" ],
210+ resources = ['*' ],
211+ conditions = {
212+ 'StringLike' : {
213+ "iam:PassedToService" : ["glue.amazonaws.com" ]
214+ }
215+ }))
216+
217+
218+ self .glue_job_role = iam .Role (self ,
219+ id = "glue_job_role" ,
220+ role_name = "enterprise_repo_glue_job_role" ,
221+ assumed_by = iam .ServicePrincipal ("glue.amazonaws.com" ),
222+ path = "/service-role/" )
223+ self .glue_job_role .add_managed_policy (glue_job_role_iam_policy )
224+ self .glue_job_role .add_managed_policy (iam .ManagedPolicy .from_aws_managed_policy_name ("service-role/AWSGlueServiceRole" ))
225+
226+ ########################################
227+ ##
228+ ## Glue Database
229+ ##
230+ #########################################
231+
232+ glue_database = glue .Database (self ,
233+ id = 'enterprise-repo-glue-db' ,
234+ database_name = 'codeartifactblog_glue_db' )
235+
236+ ########################################
237+ ##
238+ ## Glue Spark
239+ ##
240+ #########################################
241+
242+ self .glue_conn = glue .Connection (self , id = 'enterprise_repo_glue_conn' ,
243+ type = glue .ConnectionType .NETWORK ,
244+ connection_name = 'enterprise-repo-glue-connection' ,
245+ security_groups = [self .sg_glue_conn ],
246+ subnet = self .vpc .select_subnets (subnet_type = ec2 .SubnetType .PRIVATE_ISOLATED ).subnets [0 ])
247+
248+ glue_job = glue .Job (self , "enterprise_repo_spark_etl_job" ,
249+ executable = glue .JobExecutable .python_etl (glue_version = glue .GlueVersion .V3_0 ,
250+ python_version = glue .PythonVersion .THREE ,
251+ script = glue .Code .from_asset ( "./scripts/glue/job.py" )),
252+ connections = [self .glue_conn ],
253+ role = self .glue_job_role ,
254+ worker_count = 3 ,
255+ job_name = 'enterprise-repo-glue-job' ,
256+ worker_type = glue .WorkerType .G_1_X ,
257+ continuous_logging = glue .ContinuousLoggingProps (enabled = True ),
258+ max_retries = 0 ,
259+ enable_profiling_metrics = True ,
260+ timeout = Duration .minutes (20 ),
261+ default_arguments = {'--additional-python-modules' : 'awswrangler' ,
262+ '--class' : 'GlueApp' ,
263+ '--S3_BUCKET' : "" + bucket .bucket_name + "" ,
264+ '--GLUE_DATABASE' : "" + glue_database .database_name + "" ,
265+ '--python-modules-installer-option' : '' },
266+ description = "an example Python ETL job" )
267+
268+ ####################################
269+ ##
270+ ## State Machine Execution Role Policy
271+ ##
272+ ####################################
273+ sfn_execution_role_iam_policy = iam .ManagedPolicy (self ,
274+ "enterprise_repo_sfn_iam_policy" ,
275+ managed_policy_name = 'enterprise-repo-sfn-policy' ,
276+ description = "SFN IAM Policy" )
277+
278+ sfn_execution_role_iam_policy .add_statements (iam .PolicyStatement (effect = iam .Effect .ALLOW ,
279+ actions = ["s3:PutObject" ,
280+ "s3:GetObject" ],
281+ resources = ["" + bucket .bucket_arn + "/*" ]))
282+
283+ ########################################
284+ ##
285+ ## State Machine
286+ ##
287+ #########################################
288+ with open ('./scripts/statemachine/sfn.json' ) as f :
289+ json_definition = json .load (f )
290+
291+ json_definition ["States" ]["GenerateCodeArtifactURL" ]["Parameters" ]["codeartifacturl.$" ] = "States.Format('--index-url=" + code_artifact_url .strip ()+ "', $.taskresult.AuthorizationToken)" .strip ()
292+ definition = json .dumps (json_definition , indent = 4 )
293+
294+ self .sfn_role = iam .Role (self ,
295+ id = "sfn_role" ,
296+ role_name = "enterprise_repo_sfn_role" ,
297+ assumed_by = iam .ServicePrincipal ("states.amazonaws.com" ),
298+ path = "/service-role/" )
299+ self .sfn_role .add_managed_policy (sfn_execution_role_iam_policy )
300+ self .sfn_role .add_managed_policy (iam .ManagedPolicy .from_aws_managed_policy_name ("AmazonS3ReadOnlyAccess" ))
301+ self .sfn_role .add_managed_policy (iam .ManagedPolicy .from_aws_managed_policy_name ("AWSCodeArtifactReadOnlyAccess" ))
302+ self .sfn_role .add_managed_policy (iam .ManagedPolicy .from_aws_managed_policy_name ("service-role/AWSGlueServiceRole" ))
303+
304+
305+ state_machine = sfn .CfnStateMachine (self ,
306+ "enterprise_repo_state_machine" ,
307+ role_arn = self .sfn_role .role_arn ,
308+ state_machine_name = 'enterprise-repo-step-function' ,
309+ definition_string = definition ,
310+ definition_substitutions = {"domain" : self .domain_name ,
311+ "aws_account_id" : Aws .ACCOUNT_ID ,
312+ "jobname" : glue_job .job_name })
313+
314+ ####################################
315+ ##
316+ ## Cfn Output
317+ ##
318+ ####################################
319+
320+ CfnOutput (self , "Repository_Name" ,
321+ value = self .repo_name ,
322+ description = "Code Artifact Repository Name"
323+ )
324+ CfnOutput (self , "Domain_Name" ,
325+ value = self .domain_name ,
326+ description = "Code Artifact Domain name for Repository"
327+ )
0 commit comments