CQCL
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 2 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎qis-compiler/Cargo.toml‎
Lines changed: 2 additions & 0 deletions b/‎qis-compiler/Cargo.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎qis-compiler/python/tests/resources/example_gpu.hugr‎
6.64 KB b/‎qis-compiler/python/tests/resources/example_gpu.hugr‎
6.64 KB
diff --git a/‎qis-compiler/python/tests/snapshots/test_basic_generation/test_gpu/aarch64-apple-darwin/gpu_aarch64-apple-darwin‎
Lines changed: 204 additions & 0 deletions b/‎qis-compiler/python/tests/snapshots/test_basic_generation/test_gpu/aarch64-apple-darwin/gpu_aarch64-apple-darwin‎
Lines changed: 204 additions & 0 deletions
@@ -38,3 +38,6 @@ jupyter_execute/
 
 # binaries
 *.so
+
+# a reserved working dir for local experimentation
+/scratch
@@ -19,6 +19,7 @@ pyo3 = { workspace = true, features = ["abi3-py310", "anyhow"] }
 serde_json.workspace = true
 tracing.workspace = true
 itertools.workspace = true
+strum.workspace = true
 tket = { path = "../tket", version = "0.16.0" }
 tket-qsystem = { path = "../tket-qsystem", version = "0.22.0", features = [
     "llvm",
@@ -36,6 +37,7 @@ pyo3-build-config.workspace = true
 rstest.workspace = true
 serde.workspace = true
 typetag.workspace = true
+insta = "1.43"
 
 [package.metadata.cargo-machete]
 ignored = ["cbindgen", "pyo3-build-config"]
@@ -0,0 +1,204 @@
+; ModuleID = 'hugr'
+source_filename = "hugr"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-apple-darwin"
+
+@gpu_cache_is_set_function_id_fn_returning_float = thread_local local_unnamed_addr global i8 0
+@gpu_cache_function_id_fn_returning_float = thread_local local_unnamed_addr global i64 0
+@gpu_validated = thread_local local_unnamed_addr global i8 0
+@no_gpu_error = private unnamed_addr constant [27 x i8] c"No error message available\00", align 1
+@function_name = private unnamed_addr constant [19 x i8] c"fn_returning_float\00", align 1
+@gpu_cache_is_set_function_id_fn_returning_int = thread_local local_unnamed_addr global i8 0
+@gpu_cache_function_id_fn_returning_int = thread_local local_unnamed_addr global i64 0
+@function_name.1 = private unnamed_addr constant [17 x i8] c"fn_returning_int\00", align 1
+@arg_types = private unnamed_addr constant [5 x i8] c"if:i\00", align 1
+@res_a.19FB4E83.0 = private constant [11 x i8] c"\0AUSER:INT:a"
+@arg_types.2 = private unnamed_addr constant [4 x i8] c"i:f\00", align 1
+@res_b.0E048F9C.0 = private constant [13 x i8] c"\0CUSER:FLOAT:b"
+
+; Function Attrs: noinline
+define i64 @gpu_function_id_fn_returning_float() local_unnamed_addr #0 {
+entry:
+  %function_id = load i8, i8* @gpu_cache_is_set_function_id_fn_returning_float, align 1
+  %needs_lookup = icmp eq i8 %function_id, 0
+  br i1 %needs_lookup, label %lookup, label %read_cache
+
+common.ret:                                       ; preds = %read_cache, %lookup
+  %common.ret.op = phi i64 [ %function_id2, %lookup ], [ %function_id1, %read_cache ]
+  ret i64 %common.ret.op
+
+lookup:                                           ; preds = %entry
+  tail call void @run_gpu_validation()
+  %function_id_ptr = alloca i64, align 8
+  %function_id_call = call i8 @gpu_get_function_id(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @function_name, i64 0, i64 0), i64* nonnull %function_id_ptr)
+  call void @validate_gpu_response(i8 %function_id_call)
+  %function_id2 = load i64, i64* %function_id_ptr, align 8
+  store i64 %function_id2, i64* @gpu_cache_function_id_fn_returning_float, align 8
+  store i8 1, i8* @gpu_cache_is_set_function_id_fn_returning_float, align 1
+  br label %common.ret
+
+read_cache:                                       ; preds = %entry
+  %function_id1 = load i64, i64* @gpu_cache_function_id_fn_returning_float, align 8
+  br label %common.ret
+}
+
+; Function Attrs: noinline
+define void @run_gpu_validation() local_unnamed_addr #0 {
+entry:
+  %validated = load i8, i8* @gpu_validated, align 1
+  %already_validated.not = icmp eq i8 %validated, 0
+  br i1 %already_validated.not, label %validate, label %common.ret
+
+common.ret:                                       ; preds = %entry, %validate
+  ret void
+
+validate:                                         ; preds = %entry
+  %validate_call = tail call i8 @gpu_validate_api(i64 0, i64 1, i64 0)
+  tail call void @validate_gpu_response(i8 %validate_call)
+  store i8 1, i8* @gpu_validated, align 1
+  br label %common.ret
+}
+
+declare i8 @gpu_validate_api(i64, i64, i64) local_unnamed_addr
+
+; Function Attrs: noinline
+define void @validate_gpu_response(i8 %0) local_unnamed_addr #0 {
+entry:
+  %success.not = icmp eq i8 %0, 0
+  br i1 %success.not, label %err, label %ok
+
+ok:                                               ; preds = %entry
+  ret void
+
+err:                                              ; preds = %entry
+  tail call void @gpu_error_handler()
+  unreachable
+}
+
+; Function Attrs: noinline noreturn
+define void @gpu_error_handler() local_unnamed_addr #1 {
+entry:
+  %error_message = tail call i8* @gpu_get_error()
+  %is_null = icmp eq i8* %error_message, null
+  %error_message_nonnull = select i1 %is_null, i8* getelementptr inbounds ([27 x i8], [27 x i8]* @no_gpu_error, i64 0, i64 0), i8* %error_message
+  tail call void @panic_str(i32 70002, i8* %error_message_nonnull)
+  unreachable
+}
+
+declare i8* @gpu_get_error() local_unnamed_addr
+
+; Function Attrs: noreturn
+declare void @panic_str(i32, i8*) local_unnamed_addr #2
+
+declare i8 @gpu_get_function_id(i8*, i64*) local_unnamed_addr
+
+; Function Attrs: noinline
+define i64 @gpu_function_id_fn_returning_int() local_unnamed_addr #0 {
+entry:
+  %function_id = load i8, i8* @gpu_cache_is_set_function_id_fn_returning_int, align 1
+  %needs_lookup = icmp eq i8 %function_id, 0
+  br i1 %needs_lookup, label %lookup, label %read_cache
+
+common.ret:                                       ; preds = %read_cache, %lookup
+  %common.ret.op = phi i64 [ %function_id2, %lookup ], [ %function_id1, %read_cache ]
+  ret i64 %common.ret.op
+
+lookup:                                           ; preds = %entry
+  tail call void @run_gpu_validation()
+  %function_id_ptr = alloca i64, align 8
+  %function_id_call = call i8 @gpu_get_function_id(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @function_name.1, i64 0, i64 0), i64* nonnull %function_id_ptr)
+  call void @validate_gpu_response(i8 %function_id_call)
+  %function_id2 = load i64, i64* %function_id_ptr, align 8
+  store i64 %function_id2, i64* @gpu_cache_function_id_fn_returning_int, align 8
+  store i8 1, i8* @gpu_cache_is_set_function_id_fn_returning_int, align 1
+  br label %common.ret
+
+read_cache:                                       ; preds = %entry
+  %function_id1 = load i64, i64* @gpu_cache_function_id_fn_returning_int, align 8
+  br label %common.ret
+}
+
+declare i8 @gpu_init(i64, i64*) local_unnamed_addr
+
+declare i8 @gpu_call(i64, i64, i64, i8*, i8*) local_unnamed_addr
+
+declare i8 @gpu_get_result(i64, i64, i8*) local_unnamed_addr
+
+declare void @print_int(i8*, i64, i64) local_unnamed_addr
+
+declare i8 @gpu_discard(i64) local_unnamed_addr
+
+declare void @print_float(i8*, i64, double) local_unnamed_addr
+
+define i64 @qmain(i64 %0) local_unnamed_addr {
+entry:
+  %gpu_ref_ptr.i = alloca i64, align 8
+  %gpu_input_blob.i = alloca [16 x i8], align 8
+  %int_result.i = alloca i64, align 8
+  %gpu_input_blob26.i = alloca i64, align 8
+  %int_result32.i = alloca i64, align 8
+  tail call void @setup(i64 %0)
+  %1 = bitcast i64* %gpu_ref_ptr.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %1)
+  %2 = getelementptr inbounds [16 x i8], [16 x i8]* %gpu_input_blob.i, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %2)
+  %3 = bitcast i64* %int_result.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %3)
+  %4 = bitcast i64* %gpu_input_blob26.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %4)
+  %5 = bitcast i64* %int_result32.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %5)
+  %function_id_call.i = tail call i64 @gpu_function_id_fn_returning_float()
+  %function_id_call3.i = tail call i64 @gpu_function_id_fn_returning_int()
+  tail call void @run_gpu_validation()
+  %gpu_ref_call.i = call i8 @gpu_init(i64 0, i64* nonnull %gpu_ref_ptr.i)
+  call void @validate_gpu_response(i8 %gpu_ref_call.i)
+  %gpu_ref.i = load i64, i64* %gpu_ref_ptr.i, align 8
+  %6 = bitcast [16 x i8]* %gpu_input_blob.i to i64*
+  store i64 42, i64* %6, align 8
+  %dest_ptr17.i = getelementptr inbounds [16 x i8], [16 x i8]* %gpu_input_blob.i, i64 0, i64 8
+  %7 = bitcast i8* %dest_ptr17.i to i64*
+  store i64 4613303441197561744, i64* %7, align 8
+  %8 = call i8 @gpu_call(i64 %gpu_ref.i, i64 %function_id_call3.i, i64 16, i8* nonnull %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @arg_types, i64 0, i64 0))
+  call void @validate_gpu_response(i8 %8)
+  %read_status.i = call i8 @gpu_get_result(i64 %gpu_ref.i, i64 8, i8* nonnull %3)
+  call void @validate_gpu_response(i8 %read_status.i)
+  %int_result20.i = load i64, i64* %int_result.i, align 8
+  call void @print_int(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @res_a.19FB4E83.0, i64 0, i64 0), i64 10, i64 %int_result20.i)
+  store i64 %int_result20.i, i64* %gpu_input_blob26.i, align 8
+  %9 = call i8 @gpu_call(i64 %gpu_ref.i, i64 %function_id_call.i, i64 8, i8* nonnull %4, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @arg_types.2, i64 0, i64 0))
+  call void @validate_gpu_response(i8 %9)
+  %read_status34.i = call i8 @gpu_get_result(i64 %gpu_ref.i, i64 8, i8* nonnull %5)
+  call void @validate_gpu_response(i8 %read_status34.i)
+  %float_result_ptr.i = bitcast i64* %int_result32.i to double*
+  %float_result.i = load double, double* %float_result_ptr.i, align 8
+  %10 = call i8 @gpu_discard(i64 %gpu_ref.i)
+  call void @validate_gpu_response(i8 %10)
+  call void @print_float(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @res_b.0E048F9C.0, i64 0, i64 0), i64 12, double %float_result.i)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %1)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %2)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %3)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %4)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %5)
+  %11 = call i64 @teardown()
+  ret i64 %11
+}
+
+declare void @setup(i64) local_unnamed_addr
+
+declare i64 @teardown() local_unnamed_addr
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
+
+attributes #0 = { noinline }
+attributes #1 = { noinline noreturn }
+attributes #2 = { noreturn }
+attributes #3 = { argmemonly nofree nosync nounwind willreturn }
+
+!name = !{!0}
+
+!0 = !{!"mainlib"}