diff --git a/Makefile b/Makefile deleted file mode 100644 index 670984e..0000000 --- a/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -.PHONY: help format clean build generate-client run deps tidy - -.DEFAULT_GOAL := help - -help: ## Show this help message - @echo "Available commands:" - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' - -format: ## Format code using gofmt - go fmt ./... - -clean: ## Clean build artifacts - rm -rf ./bin - -build: ## Build the example application - go build -o ./bin/judgeval ./examples - -deps: ## Download dependencies - go mod download - -tidy: ## Tidy up dependencies - go mod tidy - -generate-client: ## Generate API client from OpenAPI spec - python3 scripts/generate-client.py - make format - -run: ## Run the example application with environment variables (usage: make run ) - @if [ -z "$(filter-out $@,$(MAKECMDGOALS))" ]; then \ - echo "Error: Please specify an example to run"; \ - echo "Usage: make run "; \ - echo "Available examples:"; \ - echo " simple-chat"; \ - echo " manual-otel"; \ - exit 1; \ - fi - @if [ -f .env ]; then \ - set -a && . ./.env && set +a && cd examples/$(filter-out $@,$(MAKECMDGOALS)) && go run .; \ - else \ - cd examples/$(filter-out $@,$(MAKECMDGOALS)) && go run .; \ - fi - -test: ## Run tests - go test ./... - -install: ## Install the library (for local development) - go install ./... - -# Handle arguments passed to make run -%: - @: diff --git a/README.md b/README.md index 298b0fa..8517d5c 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ -# Judgeval Go SDK \ No newline at end of file +# Judgeval Go SDK + +[![Go Reference](https://pkg.go.dev/badge/github.com/JudgmentLabs/judgeval-go.svg)](https://pkg.go.dev/github.com/JudgmentLabs/judgeval-go) diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..1786a1d --- /dev/null +++ b/bun.lock @@ -0,0 +1,32 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "name": "judgeval-go", + "devDependencies": { + "dotenv-cli": "^7.4.2", + }, + }, + }, + "packages": { + "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], + + "dotenv": ["dotenv@16.6.1", "", {}, "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow=="], + + "dotenv-cli": ["dotenv-cli@7.4.4", "", { "dependencies": { "cross-spawn": "^7.0.6", "dotenv": "^16.3.0", "dotenv-expand": "^10.0.0", "minimist": "^1.2.6" }, "bin": { "dotenv": "cli.js" } }, "sha512-XkBYCG0tPIes+YZr4SpfFv76SQrV/LeCE8CI7JSEMi3VR9MvTihCGTOtbIexD6i2mXF+6px7trb1imVCXSNMDw=="], + + "dotenv-expand": ["dotenv-expand@10.0.0", "", {}, "sha512-GopVGCpVS1UKH75VKHGuQFqS1Gusej0z4FyQkPdwjil2gNIv+LNsqBlboOzpJFZKVT95GkCyWJbBSdFEFUWI2A=="], + + "isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="], + + "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="], + + "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], + + "shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="], + + "shebang-regex": ["shebang-regex@3.0.0", "", {}, "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="], + + "which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="], + } +} diff --git a/examples/manual-otel/go.mod b/examples/manual-otel/go.mod deleted file mode 100644 index d5bd397..0000000 --- a/examples/manual-otel/go.mod +++ /dev/null @@ -1,32 +0,0 @@ -module manual-otel-example - -go 1.25.1 - -require ( - github.com/JudgmentLabs/judgeval-go v0.0.0 - go.opentelemetry.io/otel v1.38.0 - go.opentelemetry.io/otel/sdk v1.38.0 - go.opentelemetry.io/otel/trace v1.38.0 -) - -require ( - github.com/cenkalti/backoff/v5 v5.0.3 // indirect - github.com/go-logr/logr v1.4.3 // indirect - github.com/go-logr/stdr v1.2.2 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect - go.opentelemetry.io/otel/metric v1.38.0 // indirect - go.opentelemetry.io/proto/otlp v1.7.1 // indirect - golang.org/x/net v0.43.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/text v0.28.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect - google.golang.org/grpc v1.75.0 // indirect - google.golang.org/protobuf v1.36.8 // indirect -) - -replace github.com/JudgmentLabs/judgeval-go => ../../ diff --git a/examples/manual-otel/go.sum b/examples/manual-otel/go.sum deleted file mode 100644 index 787a1bd..0000000 --- a/examples/manual-otel/go.sum +++ /dev/null @@ -1,59 +0,0 @@ -github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= -github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= -github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= -github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= -go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4= -go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= -go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= -go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= -go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= -go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= -go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= -go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= -go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= -go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= -go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= -go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= -go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= -gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= -google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= -google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= -google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= -google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/examples/manual-otel/main.go b/examples/manual-otel/main.go deleted file mode 100644 index 1c10357..0000000 --- a/examples/manual-otel/main.go +++ /dev/null @@ -1,116 +0,0 @@ -package main - -import ( - "context" - "fmt" - "log" - "time" - - "github.com/JudgmentLabs/judgeval-go/pkg/tracer" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/resource" - sdktrace "go.opentelemetry.io/otel/sdk/trace" -) - -func processData(ctx context.Context, data string) string { - _, span := otel.Tracer("data-processor").Start(ctx, "process_data") - defer span.End() - - span.SetAttributes( - attribute.String("data.input", data), - attribute.Int("data.length", len(data)), - ) - - time.Sleep(100 * time.Millisecond) - - result := fmt.Sprintf("Processed: %s", data) - span.SetAttributes(attribute.String("data.output", result)) - - return result -} - -func initOtel(judgmentTracer *tracer.Tracer) (func(), error) { - res, err := resource.New(context.Background(), - resource.WithAttributes( - attribute.String("service.name", "manual-otel-example"), - attribute.String("service.version", "1.0.0"), - attribute.String("telemetry.sdk.name", "opentelemetry"), - attribute.String("telemetry.sdk.version", "1.0.0"), - ), - ) - if err != nil { - return nil, fmt.Errorf("failed to create resource: %w", err) - } - - var spanExporter sdktrace.SpanExporter - if judgmentTracer != nil { - spanExporter = judgmentTracer.GetSpanExporter() - } - - tp := sdktrace.NewTracerProvider( - sdktrace.WithResource(res), - sdktrace.WithSpanProcessor(sdktrace.NewBatchSpanProcessor(spanExporter)), - ) - - otel.SetTracerProvider(tp) - - return func() { - time.Sleep(10 * time.Second) - if err := tp.Shutdown(context.Background()); err != nil { - log.Printf("Error shutting down tracer provider: %v", err) - } - }, nil -} - -func main() { - fmt.Println("Manual OpenTelemetry Instrumentation Example") - fmt.Println("===========================================") - - judgmentTracer, _ := tracer.NewTracer( - tracer.WithConfiguration(tracer.NewTracerConfiguration( - tracer.WithProjectName("manual-otel-example"), - )), - tracer.WithInitialize(false), - ) - - cleanup, _ := initOtel(judgmentTracer) - defer cleanup() - defer judgmentTracer.Shutdown(context.Background()) - - ctx := context.Background() - - ctx, rootSpan := otel.Tracer("main").Start(ctx, "manual_instrumentation_example") - rootSpan.SetAttributes(attribute.String("example.type", "manual_otel")) - - result1 := processData(ctx, "Hello, World!") - fmt.Printf("Result 1: %s\n", result1) - - result2 := processData(ctx, "Manual instrumentation") - fmt.Printf("Result 2: %s\n", result2) - - judgmentSpan, _ := judgmentTracer.Span(ctx, "judgment_metrics") - judgmentTracer.SetGeneralSpan(judgmentSpan) - judgmentTracer.SetAttribute(judgmentSpan, "service.type", "manual_otel") - - requestData := map[string]interface{}{ - "endpoint": "/api/process", - "method": "POST", - "timestamp": time.Now().Unix(), - } - judgmentTracer.SetInput(judgmentSpan, requestData) - - time.Sleep(50 * time.Millisecond) - - responseData := map[string]interface{}{ - "status_code": 200, - "response_time_ms": 50, - "processed_items": 2, - } - judgmentTracer.SetOutput(judgmentSpan, responseData) - judgmentSpan.End() - - rootSpan.End() - - fmt.Println("\nExample completed!") -} diff --git a/examples/simple-chat/main.go b/examples/simple-chat/main.go deleted file mode 100644 index 8779b45..0000000 --- a/examples/simple-chat/main.go +++ /dev/null @@ -1,282 +0,0 @@ -package main - -import ( - "bufio" - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "net/http" - "os" - "strings" - "time" - - "github.com/JudgmentLabs/judgeval-go/pkg/data" - "github.com/JudgmentLabs/judgeval-go/pkg/scorers" - "github.com/JudgmentLabs/judgeval-go/pkg/scorers/api_scorers" - "github.com/JudgmentLabs/judgeval-go/pkg/tracer" - "go.opentelemetry.io/otel/trace" -) - -type ChatMessage struct { - Role string `json:"role"` - Content string `json:"content"` -} - -type ChatRequest struct { - Model string `json:"model"` - Messages []ChatMessage `json:"messages"` - MaxTokens int `json:"max_tokens,omitempty"` - Temperature float64 `json:"temperature,omitempty"` -} - -type ChatResponse struct { - Choices []struct { - Message ChatMessage `json:"message"` - FinishReason string `json:"finish_reason"` - } `json:"choices"` - Usage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` - } `json:"usage"` - Model string `json:"model"` - Error struct { - Message string `json:"message"` - Type string `json:"type"` - } `json:"error"` -} - -type ChatClient struct { - apiKey string - baseURL string - client *http.Client - tracer *tracer.Tracer -} - -func NewChatClient(apiKey string) *ChatClient { - return &ChatClient{ - apiKey: apiKey, - baseURL: "https://api.openai.com/v1/chat/completions", - client: &http.Client{ - Timeout: 30 * time.Second, - }, - } -} - -func (c *ChatClient) SetTracer(t *tracer.Tracer) { - c.tracer = t -} - -func (c *ChatClient) SendMessage(ctx context.Context, messages []ChatMessage) (*ChatResponse, error) { - reqBody := ChatRequest{ - Model: "gpt-3.5-turbo", - Messages: messages, - MaxTokens: 1000, - Temperature: 0.7, - } - - if c.tracer != nil { - if span := trace.SpanFromContext(ctx); span != nil { - c.tracer.SetInput(span, reqBody) - - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIRequestModel, reqBody.Model) - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIRequestTemperature, reqBody.Temperature) - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIRequestMaxTokens, reqBody.MaxTokens) - - if len(messages) > 0 { - lastMessage := messages[len(messages)-1] - if lastMessage.Role == "user" { - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIPrompt, lastMessage.Content) - } - } - } - } - - jsonData, err := json.Marshal(reqBody) - if err != nil { - return nil, fmt.Errorf("failed to marshal request: %w", err) - } - - req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL, bytes.NewBuffer(jsonData)) - if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) - } - - req.Header.Set("Content-Type", "application/json") - req.Header.Set("Authorization", "Bearer "+c.apiKey) - - resp, err := c.client.Do(req) - if err != nil { - return nil, fmt.Errorf("failed to send request: %w", err) - } - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, fmt.Errorf("failed to read response: %w", err) - } - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("API error (status %d): %s", resp.StatusCode, string(body)) - } - - var chatResp ChatResponse - if err := json.Unmarshal(body, &chatResp); err != nil { - return nil, fmt.Errorf("failed to unmarshal response: %w", err) - } - - if c.tracer != nil { - if span := trace.SpanFromContext(ctx); span != nil { - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIResponseModel, chatResp.Model) - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIUsageInputTokens, chatResp.Usage.PromptTokens) - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIUsageOutputTokens, chatResp.Usage.CompletionTokens) - - if len(chatResp.Choices) > 0 { - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAICompletion, chatResp.Choices[0].Message.Content) - c.tracer.SetAttribute(span, tracer.AttributeKeys.GenAIResponseFinishReasons, chatResp.Choices[0].FinishReason) - } - } - } - - return &chatResp, nil -} - -func main() { - apiKey := os.Getenv("OPENAI_API_KEY") - if apiKey == "" { - fmt.Println("Error: OPENAI_API_KEY environment variable is required") - fmt.Println("Please set it with: export OPENAI_API_KEY=your_api_key_here") - os.Exit(1) - } - - chatClient := NewChatClient(apiKey) - - if os.Getenv("JUDGMENT_API_URL") != "" && os.Getenv("JUDGMENT_API_KEY") != "" { - t, err := tracer.NewTracer( - tracer.WithConfiguration(tracer.NewTracerConfiguration( - tracer.WithProjectName("default_project"), - )), - ) - if err != nil { - fmt.Printf("Warning: Failed to initialize tracer: %v\n", err) - } else { - chatClient.SetTracer(t) - defer t.Shutdown(context.Background()) - } - } - - fmt.Println("🤖 Simple Chat with OpenAI") - fmt.Println("Type 'quit' or 'exit' to end the conversation") - fmt.Println("Type 'clear' to clear conversation history") - fmt.Println("----------------------------------------") - - var messages []ChatMessage - scanner := bufio.NewScanner(os.Stdin) - messageCount := 0 - - ctx := context.Background() - var parentSpan trace.Span - if chatClient.tracer != nil { - parentSpan, ctx = chatClient.tracer.Span(ctx, "chat-session") - chatClient.tracer.SetGeneralSpan(parentSpan) - chatClient.tracer.SetAttribute(parentSpan, "chat.session.start_time", time.Now().Unix()) - defer parentSpan.End() - } - - for { - fmt.Print("You: ") - if !scanner.Scan() { - break - } - - userInput := strings.TrimSpace(scanner.Text()) - if userInput == "" { - continue - } - - if userInput == "quit" || userInput == "exit" { - fmt.Println("Goodbye! 👋") - if chatClient.tracer != nil && parentSpan != nil { - chatClient.tracer.SetAttribute(parentSpan, "chat.session.end_time", time.Now().Unix()) - chatClient.tracer.SetAttribute(parentSpan, "chat.session.message_count", messageCount) - } - break - } - - if userInput == "clear" { - messages = nil - fmt.Println("Conversation history cleared.") - continue - } - - messages = append(messages, ChatMessage{ - Role: "user", - Content: userInput, - }) - - messageCount++ - messageCtx := ctx - var span trace.Span - if chatClient.tracer != nil { - span, messageCtx = chatClient.tracer.Span(ctx, "OPENAI_API_CALL") - chatClient.tracer.SetLLMSpan(span) - chatClient.tracer.SetAttribute(span, "chat.message.number", messageCount) - defer span.End() - } - - fmt.Print("Bot: ") - resp, err := chatClient.SendMessage(messageCtx, messages) - if err != nil { - fmt.Printf("Error: %v\n", err) - continue - } - - if len(resp.Choices) == 0 { - fmt.Println("No response received from OpenAI") - continue - } - - botMessage := resp.Choices[0].Message.Content - fmt.Println(botMessage) - - messages = append(messages, ChatMessage{ - Role: "assistant", - Content: botMessage, - }) - - if chatClient.tracer != nil { - if span := trace.SpanFromContext(messageCtx); span != nil { - chatClient.tracer.SetOutput(span, botMessage) - } - } - - // Async evaluation for answer relevancy - if chatClient.tracer != nil { - go func() { - // Create answer relevancy scorer - scorer := api_scorers.NewAnswerRelevancyScorer( - scorers.WithThreshold(0.7), - scorers.WithModel("gpt-3.5-turbo"), - ) - - // Create example for evaluation - example := data.NewExample( - data.WithName(fmt.Sprintf("chat-message-%d", messageCount)), - data.WithProperty("input", userInput), - data.WithProperty("actual_output", botMessage), - ) - - // Trigger async evaluation - chatClient.tracer.AsyncEvaluate(messageCtx, scorer, example, "gpt-3.5-turbo") - }() - } - - fmt.Println() - } - - if err := scanner.Err(); err != nil { - fmt.Printf("Error reading input: %v\n", err) - } -} diff --git a/examples/simple-chat/go.mod b/examples/simple_chat/go.mod similarity index 79% rename from examples/simple-chat/go.mod rename to examples/simple_chat/go.mod index 75d32f9..11e0a5a 100644 --- a/examples/simple-chat/go.mod +++ b/examples/simple_chat/go.mod @@ -4,7 +4,8 @@ go 1.25.1 require ( github.com/JudgmentLabs/judgeval-go v0.0.0 - go.opentelemetry.io/otel/trace v1.38.0 + github.com/langwatch/langwatch/sdk-go v0.0.1 + github.com/openai/openai-go v1.12.0 ) require ( @@ -13,12 +14,17 @@ require ( github.com/go-logr/stdr v1.2.2 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect + github.com/tidwall/gjson v1.14.4 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/otel v1.38.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect go.opentelemetry.io/otel/metric v1.38.0 // indirect go.opentelemetry.io/otel/sdk v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect go.opentelemetry.io/proto/otlp v1.7.1 // indirect golang.org/x/net v0.43.0 // indirect golang.org/x/sys v0.35.0 // indirect diff --git a/examples/simple-chat/go.sum b/examples/simple_chat/go.sum similarity index 81% rename from examples/simple-chat/go.sum rename to examples/simple_chat/go.sum index 787a1bd..c64b267 100644 --- a/examples/simple-chat/go.sum +++ b/examples/simple_chat/go.sum @@ -15,10 +15,24 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= +github.com/langwatch/langwatch/sdk-go v0.0.1 h1:27JMAG2fGYeot1jiA6KefPUVavlXPPV9FfwPJyFuy8k= +github.com/langwatch/langwatch/sdk-go v0.0.1/go.mod h1:oDtLSdCMR7rh+dMo+gZrotfixWdyipQXWpGehAavcdk= +github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0= +github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= +github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= diff --git a/examples/simple_chat/main.go b/examples/simple_chat/main.go new file mode 100644 index 0000000..7ab6424 --- /dev/null +++ b/examples/simple_chat/main.go @@ -0,0 +1,198 @@ +package main + +import ( + "bufio" + "context" + "fmt" + "os" + "strings" + "time" + + v1 "github.com/JudgmentLabs/judgeval-go/v1" + otelopenai "github.com/langwatch/langwatch/sdk-go/instrumentation/openai" + "github.com/openai/openai-go" + oaioption "github.com/openai/openai-go/option" + "go.opentelemetry.io/otel/trace" +) + +type ChatClient struct { + client openai.Client + judgmentClient *v1.Judgeval + tracer *v1.Tracer +} + +func NewChatClient(apiKey string) (*ChatClient, error) { + client := openai.NewClient( + oaioption.WithAPIKey(apiKey), + oaioption.WithMiddleware(otelopenai.Middleware("default_project", + otelopenai.WithCaptureInput(), + otelopenai.WithCaptureOutput(), + )), + ) + + return &ChatClient{ + client: client, + }, nil +} + +func (c *ChatClient) SetJudgeval(judgmentClient *v1.Judgeval, tracer *v1.Tracer) { + c.judgmentClient = judgmentClient + c.tracer = tracer +} + +func (c *ChatClient) SendMessage(ctx context.Context, messages []openai.ChatCompletionMessageParamUnion) (string, error) { + response, err := c.client.Chat.Completions.New(ctx, openai.ChatCompletionNewParams{ + Model: openai.ChatModelGPT4, + Messages: messages, + MaxTokens: openai.Int(1000), + Temperature: openai.Float(0.7), + }) + if err != nil { + return "", fmt.Errorf("chat completion failed: %w", err) + } + + if len(response.Choices) == 0 { + return "", fmt.Errorf("no response received from OpenAI") + } + + return response.Choices[0].Message.Content, nil +} + +func handleMessage(ctx context.Context, chatClient *ChatClient, userInput string, messages []openai.ChatCompletionMessageParamUnion, messageCount int) ([]openai.ChatCompletionMessageParamUnion, string, error) { + var spanCtx context.Context + var span trace.Span + if chatClient.tracer != nil { + spanCtx, span = chatClient.tracer.Span(ctx, "chat-message") + defer span.End() + } else { + spanCtx = ctx + } + + messages = append(messages, openai.SystemMessage("You are a helpful assistant. Echo whatever the user says.")) + + messages = append(messages, openai.UserMessage(userInput)) + + fmt.Print("Bot: ") + botMessage, err := chatClient.SendMessage(spanCtx, messages) + if err != nil { + return messages[:len(messages)-1], "", err + } + + fmt.Println(botMessage) + messages = append(messages, openai.AssistantMessage(botMessage)) + + if chatClient.tracer != nil && chatClient.judgmentClient != nil { + chatClient.tracer.AsyncEvaluate(spanCtx, chatClient.judgmentClient.Scorers.BuiltIn.AnswerCorrectness(v1.AnswerCorrectnessScorerParams{ + Threshold: v1.Float(0.7), + }), v1.NewExample(v1.ExampleParams{ + Name: v1.String(fmt.Sprintf("chat-message-%d", messageCount)), + Properties: map[string]any{ + "input": "You are a helpful assistant. Echo whatever the user says. Do not do anything else.", + "actual_output": botMessage, + "expected_output": userInput, + }, + })) + } + + return messages, botMessage, nil +} + +func main() { + apiKey := os.Getenv("OPENAI_API_KEY") + if apiKey == "" { + fmt.Println("Error: OPENAI_API_KEY environment variable is required") + fmt.Println("Please set it with: export OPENAI_API_KEY=your_api_key_here") + os.Exit(1) + } + + chatClient, err := NewChatClient(apiKey) + if err != nil { + fmt.Printf("Error: Failed to create chat client: %v\n", err) + os.Exit(1) + } + + var tracer *v1.Tracer + var judgmentClient *v1.Judgeval + if os.Getenv("JUDGMENT_API_URL") != "" && os.Getenv("JUDGMENT_API_KEY") != "" { + client, err := v1.NewJudgeval( + v1.WithAPIKey(os.Getenv("JUDGMENT_API_KEY")), + v1.WithOrganizationID(os.Getenv("JUDGMENT_ORG_ID")), + ) + if err != nil { + fmt.Printf("Warning: Failed to create Judgment client: %v\n", err) + } else { + judgmentClient = client + ctx := context.Background() + tracer, err = client.Tracer.Create(ctx, v1.TracerCreateParams{ + ProjectName: "default_project", + }) + if err != nil { + fmt.Printf("Warning: Failed to initialize tracer: %v\n", err) + } else { + chatClient.SetJudgeval(judgmentClient, tracer) + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + tracer.Shutdown(shutdownCtx) + }() + } + } + } + + ctx, span := tracer.StartSpan(context.Background(), "main") + defer tracer.EndSpan(span) + + tracer.AsyncEvaluate(ctx, judgmentClient.Scorers.CustomScorer.Get("Helpfulness Scorer", "HelpfulnessScorer"), v1.NewExample(v1.ExampleParams{ + Properties: map[string]any{ + "question": "test", + "answer": "test", + }, + })) + + fmt.Println("Simple Chat with OpenAI") + fmt.Println("Type 'quit' or 'exit' to end the conversation") + fmt.Println("Type 'clear' to clear conversation history") + fmt.Println("----------------------------------------") + + var messages []openai.ChatCompletionMessageParamUnion + scanner := bufio.NewScanner(os.Stdin) + messageCount := 0 + + for { + fmt.Print("You: ") + if !scanner.Scan() { + break + } + + userInput := strings.TrimSpace(scanner.Text()) + if userInput == "" { + continue + } + + if userInput == "quit" || userInput == "exit" { + fmt.Println("Goodbye!") + break + } + + if userInput == "clear" { + messages = nil + fmt.Println("Conversation history cleared.") + continue + } + + messageCount++ + var err error + messages, _, err = handleMessage(ctx, chatClient, userInput, messages, messageCount) + if err != nil { + fmt.Printf("Error: %v\n", err) + messageCount-- + continue + } + + fmt.Println() + } + + if err := scanner.Err(); err != nil { + fmt.Printf("Error reading input: %v\n", err) + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..d6ece1a --- /dev/null +++ b/package.json @@ -0,0 +1,21 @@ +{ + "name": "judgeval-go", + "version": "0.0.0", + "private": true, + "scripts": { + "format": "go fmt ./...", + "format:check": "bash -c 'test -z \"$(gofmt -l .)\"'", + "clean": "rm -rf ./bin", + "deps": "go mod download", + "tidy": "go mod tidy", + "generate:client": "npm run generate:client:raw && npm run format", + "generate:client:raw": "bash scripts/generate-client.sh", + "test": "go test ./...", + "install": "go install ./...", + "example": "dotenv -e .env -- ./scripts/run-example.sh", + "ci": "npm run format:check && npm run test && npm run build" + }, + "devDependencies": { + "dotenv-cli": "^7.4.2" + } +} diff --git a/pkg/data/example.go b/pkg/data/example.go index d564389..7cfdd3f 100644 --- a/pkg/data/example.go +++ b/pkg/data/example.go @@ -1,3 +1,7 @@ +// Package data provides legacy data structures. +// +// Deprecated: Use github.com/JudgmentLabs/judgeval-go/v1 instead. +// This package will be removed in a future version. package data import ( diff --git a/pkg/internal/api/models/fetchpromptscorersrequest.go b/pkg/internal/api/models/fetchpromptscorersrequest.go index 53da6bd..5611396 100644 --- a/pkg/internal/api/models/fetchpromptscorersrequest.go +++ b/pkg/internal/api/models/fetchpromptscorersrequest.go @@ -5,7 +5,8 @@ import ( ) type FetchPromptScorersRequest struct { - Names []string `json:"names,omitempty"` + Names []string `json:"names,omitempty"` + IsTrace bool `json:"is_trace,omitempty"` AdditionalProperties map[string]interface{} `json:"-"` } diff --git a/pkg/internal/api/models/oteltracespan.go b/pkg/internal/api/models/oteltracespan.go index 890a6f7..d024b09 100644 --- a/pkg/internal/api/models/oteltracespan.go +++ b/pkg/internal/api/models/oteltracespan.go @@ -23,14 +23,6 @@ type OtelTraceSpan struct { StatusMessage string `json:"status_message,omitempty"` Events []interface{} `json:"events,omitempty"` Links []interface{} `json:"links,omitempty"` - LegacySpanId string `json:"legacy_span_id,omitempty"` - Inputs interface{} `json:"inputs,omitempty"` - Output interface{} `json:"output,omitempty"` - Error interface{} `json:"error,omitempty"` - AgentId string `json:"agent_id,omitempty"` - CumulativeLlmCost float64 `json:"cumulative_llm_cost,omitempty"` - StateAfter interface{} `json:"state_after,omitempty"` - StateBefore interface{} `json:"state_before,omitempty"` AdditionalProperties map[string]interface{} `json:"-"` } diff --git a/pkg/internal/api/models/promptscorer.go b/pkg/internal/api/models/promptscorer.go index 5b337de..aaf54ac 100644 --- a/pkg/internal/api/models/promptscorer.go +++ b/pkg/internal/api/models/promptscorer.go @@ -5,15 +5,19 @@ import ( ) type PromptScorer struct { - Name string `json:"name,omitempty"` - Prompt string `json:"prompt,omitempty"` - Threshold float64 `json:"threshold,omitempty"` - Model string `json:"model,omitempty"` - Options interface{} `json:"options,omitempty"` - Description string `json:"description,omitempty"` - CreatedAt string `json:"created_at,omitempty"` - UpdatedAt string `json:"updated_at,omitempty"` - IsTrace bool `json:"is_trace,omitempty"` + Id string `json:"id,omitempty"` + UserId string `json:"user_id,omitempty"` + OrganizationId string `json:"organization_id,omitempty"` + Name string `json:"name,omitempty"` + Prompt string `json:"prompt,omitempty"` + Threshold float64 `json:"threshold,omitempty"` + Model string `json:"model,omitempty"` + Options interface{} `json:"options,omitempty"` + Description string `json:"description,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + UpdatedAt string `json:"updated_at,omitempty"` + IsTrace bool `json:"is_trace,omitempty"` + IsBucketRubric bool `json:"is_bucket_rubric,omitempty"` AdditionalProperties map[string]interface{} `json:"-"` } diff --git a/pkg/internal/api/models/savepromptscorerresponse.go b/pkg/internal/api/models/savepromptscorerresponse.go index bbe0dbc..6651b4a 100644 --- a/pkg/internal/api/models/savepromptscorerresponse.go +++ b/pkg/internal/api/models/savepromptscorerresponse.go @@ -5,8 +5,7 @@ import ( ) type SavePromptScorerResponse struct { - Message string `json:"message,omitempty"` - Name string `json:"name,omitempty"` + ScorerResponse PromptScorer `json:"scorer_response,omitempty"` AdditionalProperties map[string]interface{} `json:"-"` } diff --git a/pkg/internal/api/models/traceevaluationrun.go b/pkg/internal/api/models/traceevaluationrun.go index 2b7ff6b..3961b4e 100644 --- a/pkg/internal/api/models/traceevaluationrun.go +++ b/pkg/internal/api/models/traceevaluationrun.go @@ -14,6 +14,7 @@ type TraceEvaluationRun struct { CreatedAt string `json:"created_at,omitempty"` TraceAndSpanIds [][]interface{} `json:"trace_and_span_ids,omitempty"` IsOffline bool `json:"is_offline,omitempty"` + IsBucketRun bool `json:"is_bucket_run,omitempty"` AdditionalProperties map[string]interface{} `json:"-"` } diff --git a/pkg/scorers/api_scorer.go b/pkg/scorers/api_scorer.go index 55a0c1a..61abac6 100644 --- a/pkg/scorers/api_scorer.go +++ b/pkg/scorers/api_scorer.go @@ -1,3 +1,7 @@ +// Package scorers provides legacy scorer functionality. +// +// Deprecated: Use github.com/JudgmentLabs/judgeval-go/v1 instead. +// This package will be removed in a future version. package scorers import ( diff --git a/pkg/scorers/api_scorers/prompt_scorer/base_prompt_scorer.go b/pkg/scorers/api_scorers/prompt_scorer/base_prompt_scorer.go index 93e253e..abdbda9 100644 --- a/pkg/scorers/api_scorers/prompt_scorer/base_prompt_scorer.go +++ b/pkg/scorers/api_scorers/prompt_scorer/base_prompt_scorer.go @@ -141,7 +141,7 @@ func PushPromptScorer( } if response != nil { - return response.Name, nil + return response.ScorerResponse.Name, nil } return "", nil } diff --git a/pkg/tracer/tracer.go b/pkg/tracer/tracer.go index c379187..b9e6554 100644 --- a/pkg/tracer/tracer.go +++ b/pkg/tracer/tracer.go @@ -1,3 +1,7 @@ +// Package tracer provides legacy tracing functionality. +// +// Deprecated: Use github.com/JudgmentLabs/judgeval-go/v1 instead. +// This package will be removed in a future version. package tracer import ( diff --git a/scripts/generate-client-v1.py b/scripts/generate-client-v1.py new file mode 100755 index 0000000..ebca282 --- /dev/null +++ b/scripts/generate-client-v1.py @@ -0,0 +1,563 @@ +#!/usr/bin/env python3 + +import json +import os +import shutil +import sys +from typing import Any, Dict, List, Optional, Set +import httpx + +JUDGEVAL_PATHS = [ + "/log_eval_results/", + "/fetch_experiment_run/", + "/add_to_run_eval_queue/", + "/get_evaluation_status/", + "/save_scorer/", + "/fetch_scorers/", + "/scorer_exists/", + "/projects/resolve/", +] + +HTTP_METHODS = {"GET", "POST", "PUT", "PATCH", "DELETE"} +SUCCESS_STATUS_CODES = {"200", "201"} +SCHEMA_REF_PREFIX = "#/components/schemas/" + + +def resolve_ref(ref: str) -> str: + assert ref.startswith( + SCHEMA_REF_PREFIX + ), f"Reference must start with {SCHEMA_REF_PREFIX}" + return ref.replace(SCHEMA_REF_PREFIX, "") + + +def to_camel_case(name: str) -> str: + parts = name.replace("-", "_").split("_") + return parts[0] + "".join(word.capitalize() for word in parts[1:]) + + +def to_struct_name(name: str) -> str: + camel_case = to_camel_case(name) + return camel_case[0].upper() + camel_case[1:] + + +def get_method_name_from_path(path: str, method: str) -> str: + clean_path = path.strip("/").replace("/", "_").replace("-", "_") + camel_case = to_camel_case(clean_path) + return ( + camel_case[0].upper() + camel_case[1:] + ) # Make it PascalCase for exported methods + + +def get_query_parameters(operation: Dict[str, Any]) -> List[Dict[str, Any]]: + return [ + { + "name": param["name"], + "required": param.get("required", False), + "type": param.get("schema", {}).get("type", "string"), + } + for param in operation.get("parameters", []) + if param.get("in") == "query" + ] + + +def get_schema_from_content(content: Dict[str, Any]) -> Optional[str]: + if "application/json" in content: + schema = content["application/json"].get("schema", {}) + return resolve_ref(schema["$ref"]) if "$ref" in schema else None + return None + + +def get_request_schema(operation: Dict[str, Any]) -> Optional[str]: + request_body = operation.get("requestBody", {}) + return ( + get_schema_from_content(request_body.get("content", {})) + if request_body + else None + ) + + +def get_response_schema(operation: Dict[str, Any]) -> Optional[str]: + responses = operation.get("responses", {}) + for status_code in SUCCESS_STATUS_CODES: + if status_code in responses: + result = get_schema_from_content(responses[status_code].get("content", {})) + if result: + return result + return None + + +def extract_dependencies( + schema: Dict[str, Any], visited: Optional[Set[str]] = None +) -> Set[str]: + if visited is None: + visited = set() + + schema_key = json.dumps(schema, sort_keys=True) + if schema_key in visited: + return set() + + visited.add(schema_key) + dependencies: Set[str] = set() + + if "$ref" in schema: + return {resolve_ref(schema["$ref"])} + + for key in ["anyOf", "oneOf", "allOf"]: + if key in schema: + for s in schema[key]: + dependencies.update(extract_dependencies(s, visited)) + + if "properties" in schema: + for prop_schema in schema["properties"].values(): + dependencies.update(extract_dependencies(prop_schema, visited)) + + if "items" in schema: + dependencies.update(extract_dependencies(schema["items"], visited)) + + if "additionalProperties" in schema and isinstance( + schema["additionalProperties"], dict + ): + dependencies.update( + extract_dependencies(schema["additionalProperties"], visited) + ) + + return dependencies + + +def find_used_schemas(spec: Dict[str, Any]) -> Set[str]: + used_schemas = set() + schemas = spec.get("components", {}).get("schemas", {}) + + for path in JUDGEVAL_PATHS: + if path in spec["paths"]: + for method, operation in spec["paths"][path].items(): + if method.upper() in HTTP_METHODS: + for schema in [ + get_request_schema(operation), + get_response_schema(operation), + ]: + if schema: + used_schemas.add(schema) + + changed = True + while changed: + changed = False + new_schemas = set() + + for schema_name in used_schemas: + if schema_name in schemas: + deps = extract_dependencies(schemas[schema_name]) + for dep in deps: + if dep in schemas and dep not in used_schemas: + new_schemas.add(dep) + changed = True + + used_schemas.update(new_schemas) + + return used_schemas + + +def get_go_type(schema: Dict[str, Any]) -> str: + if "$ref" in schema: + return to_struct_name(resolve_ref(schema["$ref"])) + + for union_key in ["anyOf", "oneOf", "allOf"]: + if union_key in schema: + union_schemas = schema[union_key] + types = set() + + for union_schema in union_schemas: + if union_schema.get("type") == "null": + types.add("null") + else: + types.add(get_go_type(union_schema)) + + non_null_types = types - {"null"} + if len(non_null_types) == 1: + return list(non_null_types)[0] + else: + print( + f"Union type with multiple non-null types: {non_null_types}", + file=sys.stderr, + ) + return "interface{}" + + schema_type = schema.get("type", "object") + type_mapping = { + "string": "string", + "integer": "int", + "number": "float64", + "boolean": "bool", + "object": "interface{}", + } + + if schema_type == "array": + items = schema.get("items", {}) + return f"[]{get_go_type(items)}" if items else "[]interface{}" + + return type_mapping.get(schema_type, "interface{}") + + +def generate_struct(className: str, schema: Dict[str, Any]) -> str: + lines = [ + "package models", + "", + "import (", + ' "encoding/json"', + ")", + "", + f"type {className} struct {{", + ] + + if "properties" in schema: + for field_name, property_schema in schema["properties"].items(): + go_type = get_go_type(property_schema) + json_tag = f'json:"{field_name},omitempty"' + + lines.append(f" {to_struct_name(field_name)} {go_type} `{json_tag}`") + + lines.append("") + + lines.extend( + [ + ' AdditionalProperties map[string]interface{} `json:"-"`', + "}", + "", + f"func (m *{className}) UnmarshalJSON(data []byte) error {{", + f" type Alias {className}", + " aux := &struct {", + " *Alias", + " }{", + " Alias: (*Alias)(m),", + " }", + " if err := json.Unmarshal(data, &aux); err != nil {{", + " return err", + " }}", + " m.AdditionalProperties = make(map[string]interface{})", + " if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil {{", + " return err", + " }}", + " return nil", + "}", + "", + f"func (m {className}) MarshalJSON() ([]byte, error) {{", + f" type Alias {className}", + " aux := &struct {", + " *Alias", + " }{", + " Alias: (*Alias)(&m),", + " }", + " ", + " result := make(map[string]interface{})", + " ", + " mainBytes, err := json.Marshal(aux)", + " if err != nil {{", + " return nil, err", + " }}", + " ", + " if err := json.Unmarshal(mainBytes, &result); err != nil {{", + " return nil, err", + " }}", + " ", + " for k, v := range m.AdditionalProperties {{", + " result[k] = v", + " }}", + " ", + " return json.Marshal(result)", + "}", + ] + ) + + return "\n".join(lines) + + +def generate_method_signature( + method_name: str, + request_type: Optional[str], + query_params: List[Dict[str, Any]], + response_type: str, +) -> str: + params = [] + + for param in query_params: + if param["required"]: + params.append(f"{param['name']} string") + + if request_type: + params.append(f"payload *models.{request_type}") + + for param in query_params: + if not param["required"]: + params.append(f"{param['name']} *string") + + response_type_ref = ( + f"models.{response_type}" if response_type != "interface{}" else response_type + ) + return f"func (c *Client) {method_name}({', '.join(params)}) (*{response_type_ref}, error) {{" + + +def generate_method_body( + method_name: str, + path: str, + method: str, + request_type: Optional[str], + query_params: List[Dict[str, Any]], + response_type: str, +) -> str: + response_type_ref = ( + f"models.{response_type}" if response_type != "interface{}" else response_type + ) + lines = [] + + if query_params: + lines.append(" queryParams := make(map[string]string)") + for param in query_params: + param_name = param["name"] + if param["required"]: + lines.append(f' queryParams["{param_name}"] = {param_name}') + else: + lines.extend( + [ + f" if {param_name} != nil {{", + f' queryParams["{param_name}"] = *{param_name}', + " }", + ] + ) + + if query_params: + lines.append(' url := c.buildURL("' + path + '", queryParams)') + else: + lines.append(' url := c.buildURL("' + path + '", nil)') + + if method in ["GET", "DELETE"]: + lines.extend( + [ + f' req, err := http.NewRequest("{method}", url, nil)', + " if err != nil {", + " return nil, err", + " }", + " c.setHeaders(req)", + ] + ) + else: + payload_expr = "payload" if request_type else "struct{}{}" + lines.extend( + [ + f" jsonPayload, err := json.Marshal({payload_expr})", + " if err != nil {", + " return nil, err", + " }", + f' req, err := http.NewRequest("{method}", url, bytes.NewBuffer(jsonPayload))', + " if err != nil {", + " return nil, err", + " }", + " c.setHeaders(req)", + ] + ) + + lines.extend( + [ + " resp, err := c.httpClient.Do(req)", + " if err != nil {", + " return nil, err", + " }", + " defer resp.Body.Close()", + "", + " if resp.StatusCode >= 400 {", + " body, _ := io.ReadAll(resp.Body)", + ' return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body))', + " }", + "", + f" var result {response_type_ref}", + " if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {", + " return nil, err", + " }", + " return &result, nil", + ] + ) + + return "\n".join(lines) + + +def generate_client_class(methods: List[Dict[str, Any]]) -> str: + lines = [ + "package api", + "", + "import (", + ' "bytes"', + ' "encoding/json"', + ' "fmt"', + ' "io"', + ' "net/http"', + ' "net/url"', + "", + ' "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models"', + ")", + "", + "type Client struct {", + " baseURL string", + " apiKey string", + " organizationID string", + " httpClient *http.Client", + "}", + "", + "func NewClient(baseURL, apiKey, organizationID string) *Client {", + " return &Client{", + " baseURL: baseURL,", + " apiKey: apiKey,", + " organizationID: organizationID,", + " httpClient: &http.Client{},", + " }", + "}", + "", + "func (c *Client) buildURL(path string, queryParams map[string]string) string {", + " u, _ := url.Parse(c.baseURL + path)", + " if len(queryParams) > 0 {", + " q := u.Query()", + " for k, v := range queryParams {", + " q.Set(k, v)", + " }", + " u.RawQuery = q.Encode()", + " }", + " return u.String()", + "}", + "", + "func (c *Client) setHeaders(req *http.Request) {", + ' req.Header.Set("Content-Type", "application/json")', + ' req.Header.Set("Authorization", "Bearer "+c.apiKey)', + ' req.Header.Set("X-Organization-Id", c.organizationID)', + "}", + "", + "func (c *Client) GetBaseURL() string {", + " return c.baseURL", + "}", + "", + "func (c *Client) GetAPIKey() string {", + " return c.apiKey", + "}", + "", + "func (c *Client) GetOrganizationID() string {", + " return c.organizationID", + "}", + "", + ] + + for method_info in methods: + signature = generate_method_signature( + method_info["name"], + method_info["request_type"], + method_info["query_params"], + method_info["response_type"], + ) + lines.append(signature) + + body = generate_method_body( + method_info["name"], + method_info["path"], + method_info["method"], + method_info["request_type"], + method_info["query_params"], + method_info["response_type"], + ) + lines.append(body) + lines.append("}") + lines.append("") + + return "\n".join(lines) + + +def generate_api_files(spec: Dict[str, Any]) -> None: + used_schemas = find_used_schemas(spec) + schemas = spec.get("components", {}).get("schemas", {}) + + models_dir = "v1/internal/api/models" + if os.path.exists(models_dir): + print(f"Clearing existing models directory: {models_dir}", file=sys.stderr) + shutil.rmtree(models_dir) + + os.makedirs(models_dir, exist_ok=True) + + print("Generating model structs...", file=sys.stderr) + for schema_name in used_schemas: + if schema_name in schemas: + struct_name = to_struct_name(schema_name) + model_struct = generate_struct(struct_name, schemas[schema_name]) + + with open(f"{models_dir}/{struct_name.lower()}.go", "w") as f: + f.write(model_struct) + + print(f"Generated model: {struct_name}", file=sys.stderr) + + filtered_paths = { + path: spec_data + for path, spec_data in spec["paths"].items() + if path in JUDGEVAL_PATHS + } + + for path in JUDGEVAL_PATHS: + if path not in spec["paths"]: + print(f"Path {path} not found in OpenAPI spec", file=sys.stderr) + + methods = [] + for path, path_data in filtered_paths.items(): + for method, operation in path_data.items(): + if method.upper() in HTTP_METHODS: + method_name = get_method_name_from_path(path, method.upper()) + request_schema = get_request_schema(operation) + response_schema = get_response_schema(operation) + query_params = get_query_parameters(operation) + + print( + f"{method_name} {request_schema} {response_schema} {query_params}", + file=sys.stderr, + ) + + method_info = { + "name": method_name, + "path": path, + "method": method.upper(), + "request_type": ( + to_struct_name(request_schema) if request_schema else None + ), + "query_params": query_params, + "response_type": ( + to_struct_name(response_schema) + if response_schema + else "EvalResults" # Default response type + ), + } + methods.append(method_info) + + api_dir = "v1/internal/api" + os.makedirs(api_dir, exist_ok=True) + + client_class = generate_client_class(methods) + with open(f"{api_dir}/client.go", "w") as f: + f.write(client_class) + print(f"Generated: {api_dir}/client.go", file=sys.stderr) + + +def main(): + spec_file = ( + sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json" + ) + + try: + if spec_file.startswith("http"): + with httpx.Client() as client: + response = client.get(spec_file) + response.raise_for_status() + spec = response.json() + else: + with open(spec_file, "r") as f: + spec = json.load(f) + + generate_api_files(spec) + + except Exception as e: + print(f"Error generating API client: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/generate-client.sh b/scripts/generate-client.sh index 224e63a..bc8ca4c 100644 --- a/scripts/generate-client.sh +++ b/scripts/generate-client.sh @@ -1,3 +1,4 @@ #!/bin/bash -python3 scripts/generate_client.py "${1:-http://localhost:8000/openapi.json}" +python3 scripts/generate-client.py "${1:-http://localhost:8000/openapi.json}" +python3 scripts/generate-client-v1.py "${1:-http://localhost:8000/openapi.json}" diff --git a/scripts/run-example.sh b/scripts/run-example.sh new file mode 100755 index 0000000..d650ed2 --- /dev/null +++ b/scripts/run-example.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "Error: Please specify an example to run" + echo "Usage: bun run example " + echo "Available examples:" + ls -1 examples/ | grep -v "^\\." || echo " (none found)" + exit 1 +fi + +cd "examples/$1" && go run . + diff --git a/v1/base_scorer.go b/v1/base_scorer.go new file mode 100644 index 0000000..da993b8 --- /dev/null +++ b/v1/base_scorer.go @@ -0,0 +1,63 @@ +package v1 + +import ( + "maps" + + "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" +) + +type BaseScorer interface { + GetName() string + GetScorerConfig() *models.ScorerConfig +} + +type apiScorer struct { + scoreType APIScorerType + threshold float64 + name string + strictMode bool + model string + requiredParams []string + additionalProps map[string]interface{} +} + +func newAPIScorer(scoreType APIScorerType, threshold float64, name string, strictMode bool, model string, requiredParams []string) *apiScorer { + finalThreshold := threshold + if strictMode { + finalThreshold = 1.0 + } + + finalName := name + if finalName == "" { + finalName = scoreType.String() + } + + return &apiScorer{ + scoreType: scoreType, + threshold: finalThreshold, + name: finalName, + strictMode: strictMode, + model: model, + requiredParams: requiredParams, + additionalProps: make(map[string]interface{}), + } +} + +func (s *apiScorer) GetName() string { + return s.name +} + +func (s *apiScorer) toScorerConfig(requiredParams []string) *models.ScorerConfig { + kwargs := make(map[string]interface{}) + maps.Copy(kwargs, s.additionalProps) + + return &models.ScorerConfig{ + ScoreType: s.scoreType.String(), + Threshold: s.threshold, + Name: s.name, + StrictMode: s.strictMode, + RequiredParams: requiredParams, + Kwargs: kwargs, + Model: s.model, + } +} diff --git a/v1/builtin_scorers.go b/v1/builtin_scorers.go new file mode 100644 index 0000000..a71d485 --- /dev/null +++ b/v1/builtin_scorers.go @@ -0,0 +1,101 @@ +package v1 + +import "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" + +type BuiltInScorersFactory struct{} + +type FaithfulnessScorerParams struct { + Threshold *float64 + Name *string + StrictMode *bool + Model *string +} + +type FaithfulnessScorer struct { + *apiScorer + requiredParams []string +} + +func (f *BuiltInScorersFactory) Faithfulness(params FaithfulnessScorerParams) *FaithfulnessScorer { + scorer := newAPIScorer( + APIScorerTypeFaithfulness, + getFloat(params.Threshold, 0.5), + getString(params.Name, ""), + getBool(params.StrictMode, false), + getString(params.Model, ""), + []string{"context", "actual_output"}, + ) + + return &FaithfulnessScorer{ + apiScorer: scorer, + requiredParams: []string{"context", "actual_output"}, + } +} + +func (s *FaithfulnessScorer) GetScorerConfig() *models.ScorerConfig { + return s.apiScorer.toScorerConfig(s.requiredParams) +} + +type AnswerCorrectnessScorerParams struct { + Threshold *float64 + Name *string + StrictMode *bool + Model *string +} + +type AnswerCorrectnessScorer struct { + *apiScorer + requiredParams []string +} + +func (f *BuiltInScorersFactory) AnswerCorrectness(params AnswerCorrectnessScorerParams) *AnswerCorrectnessScorer { + scorer := newAPIScorer( + APIScorerTypeAnswerCorrectness, + getFloat(params.Threshold, 0.5), + getString(params.Name, ""), + getBool(params.StrictMode, false), + getString(params.Model, ""), + []string{"input", "actual_output", "expected_output"}, + ) + + return &AnswerCorrectnessScorer{ + apiScorer: scorer, + requiredParams: []string{"input", "actual_output", "expected_output"}, + } +} + +func (s *AnswerCorrectnessScorer) GetScorerConfig() *models.ScorerConfig { + return s.apiScorer.toScorerConfig(s.requiredParams) +} + +type AnswerRelevancyScorerParams struct { + Threshold *float64 + Name *string + StrictMode *bool + Model *string +} + +type AnswerRelevancyScorer struct { + *apiScorer + requiredParams []string +} + +func (f *BuiltInScorersFactory) AnswerRelevancy(params AnswerRelevancyScorerParams) *AnswerRelevancyScorer { + scorer := newAPIScorer( + APIScorerTypeAnswerRelevancy, + getFloat(params.Threshold, 0.5), + getString(params.Name, ""), + getBool(params.StrictMode, false), + getString(params.Model, ""), + []string{"input", "actual_output"}, + ) + + return &AnswerRelevancyScorer{ + apiScorer: scorer, + requiredParams: []string{"input", "actual_output"}, + } +} + +func (s *AnswerRelevancyScorer) GetScorerConfig() *models.ScorerConfig { + return s.apiScorer.toScorerConfig(s.requiredParams) +} diff --git a/v1/custom_scorer.go b/v1/custom_scorer.go new file mode 100644 index 0000000..1438008 --- /dev/null +++ b/v1/custom_scorer.go @@ -0,0 +1,49 @@ +package v1 + +import ( + "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" +) + +type CustomScorerFactory struct{} + +type CustomScorerParams struct { + Name string + ClassName *string +} + +type CustomScorer struct { + name string + className string + serverHosted bool +} + +func (f *CustomScorerFactory) Get(name string, className string) *CustomScorer { + return &CustomScorer{ + name: name, + className: className, + serverHosted: true, + } +} + +func (s *CustomScorer) GetName() string { + return s.name +} + +func (s *CustomScorer) GetClassName() string { + return s.className +} + +func (s *CustomScorer) IsServerHosted() bool { + return s.serverHosted +} + +func (s *CustomScorer) GetScorerConfig() *models.ScorerConfig { + return &models.ScorerConfig{ + ScoreType: APIScorerTypeCustom.String(), + Name: s.name, + Kwargs: map[string]interface{}{ + "class_name": s.className, + "server_hosted": s.serverHosted, + }, + } +} diff --git a/v1/doc.go b/v1/doc.go new file mode 100644 index 0000000..112378f --- /dev/null +++ b/v1/doc.go @@ -0,0 +1,33 @@ +// Package v1 provides the Judgment SDK v1 format for tracing, scoring, and evaluation. +// +// Basic usage: +// +// client, err := v1.NewClient( +// v1.WithAPIKey("your-api-key"), +// v1.WithOrganizationID("your-org-id"), +// ) +// if err != nil { +// log.Fatal(err) +// } +// +// tracer, err := client.Tracer.Create(ctx, v1.TracerCreateParams{ +// ProjectName: "my-project", +// Initialize: v1.Bool(true), +// }) +// if err != nil { +// log.Fatal(err) +// } +// defer tracer.Shutdown(ctx) +// +// scorer := client.Scorers.BuiltIn.Faithfulness(v1.FaithfulnessScorerParams{ +// Threshold: v1.Float(0.8), +// }) +// +// example := v1.NewExample(v1.ExampleParams{ +// Name: v1.String("test-example"), +// Properties: map[string]interface{}{ +// "input": "What is AI?", +// "output": "Artificial Intelligence...", +// }, +// }) +package v1 diff --git a/v1/evaluation.go b/v1/evaluation.go new file mode 100644 index 0000000..e4565cc --- /dev/null +++ b/v1/evaluation.go @@ -0,0 +1,20 @@ +package v1 + +import "github.com/JudgmentLabs/judgeval-go/v1/internal/api" + +type EvaluationFactory struct { + client *api.Client +} + +type EvaluationCreateParams struct { +} + +type Evaluation struct { + client *api.Client +} + +func (f *EvaluationFactory) Create(params EvaluationCreateParams) *Evaluation { + return &Evaluation{ + client: f.client, + } +} diff --git a/v1/example.go b/v1/example.go new file mode 100644 index 0000000..3e27028 --- /dev/null +++ b/v1/example.go @@ -0,0 +1,87 @@ +package v1 + +import ( + "maps" + "time" + + "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" + "github.com/google/uuid" +) + +type ExampleParams struct { + Name *string + Properties map[string]interface{} +} + +type Example struct { + exampleID string + createdAt string + name *string + properties map[string]interface{} +} + +func NewExample(params ExampleParams) *Example { + exampleID := uuid.New().String() + createdAt := time.Now().Format(time.RFC3339) + + properties := make(map[string]interface{}) + if params.Properties != nil { + maps.Copy(properties, params.Properties) + } + + return &Example{ + exampleID: exampleID, + createdAt: createdAt, + name: params.Name, + properties: properties, + } +} + +func (e *Example) SetProperty(key string, value interface{}) *Example { + e.properties[key] = value + return e +} + +func (e *Example) GetProperty(key string) interface{} { + return e.properties[key] +} + +func (e *Example) GetProperties() map[string]interface{} { + propsCopy := make(map[string]interface{}) + maps.Copy(propsCopy, e.properties) + return propsCopy +} + +func (e *Example) GetExampleID() string { + return e.exampleID +} + +func (e *Example) GetCreatedAt() string { + return e.createdAt +} + +func (e *Example) GetName() *string { + return e.name +} + +func (e *Example) SetName(name string) { + e.name = &name +} + +func (e *Example) toModel() models.Example { + result := models.Example{ + ExampleId: e.exampleID, + CreatedAt: e.createdAt, + AdditionalProperties: make(map[string]interface{}), + } + + if e.name != nil { + result.Name = *e.name + } + + for k, v := range e.properties { + result.AdditionalProperties[k] = v + } + + return result +} diff --git a/v1/exporters.go b/v1/exporters.go new file mode 100644 index 0000000..23151e3 --- /dev/null +++ b/v1/exporters.go @@ -0,0 +1,66 @@ +package v1 + +import ( + "context" + "time" + + "github.com/JudgmentLabs/judgeval-go/pkg/logger" + "github.com/JudgmentLabs/judgeval-go/v1/internal/api" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + sdktrace "go.opentelemetry.io/otel/sdk/trace" +) + +type judgmentSpanExporter struct { + delegate sdktrace.SpanExporter +} + +func newJudgmentSpanExporter(ctx context.Context, endpoint string, apiClient *api.Client, projectID string) sdktrace.SpanExporter { + if projectID == "" { + logger.Error("projectID is required for JudgmentSpanExporter") + return newNoOpSpanExporter() + } + + headers := map[string]string{ + "Authorization": "Bearer " + apiClient.GetAPIKey(), + "X-Organization-Id": apiClient.GetOrganizationID(), + "X-Project-Id": projectID, + } + + exporter, err := otlptracehttp.New(ctx, + otlptracehttp.WithEndpointURL(endpoint), + otlptracehttp.WithHeaders(headers), + otlptracehttp.WithTimeout(30*time.Second), + ) + if err != nil { + logger.Error("Failed to create OTLP HTTP exporter: %v", err) + return newNoOpSpanExporter() + } + + return &judgmentSpanExporter{ + delegate: exporter, + } +} + +func (e *judgmentSpanExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error { + logger.Info("Exported %d spans", len(spans)) + return e.delegate.ExportSpans(ctx, spans) +} + +func (e *judgmentSpanExporter) Shutdown(ctx context.Context) error { + return e.delegate.Shutdown(ctx) +} + +type noOpSpanExporter struct{} + +func newNoOpSpanExporter() sdktrace.SpanExporter { + return &noOpSpanExporter{} +} + +func (e *noOpSpanExporter) ExportSpans(ctx context.Context, spans []sdktrace.ReadOnlySpan) error { + logger.Warning("NoOpSpanExporter: discarding %d spans", len(spans)) + return nil +} + +func (e *noOpSpanExporter) Shutdown(ctx context.Context) error { + return nil +} diff --git a/v1/internal/api/client.go b/v1/internal/api/client.go new file mode 100644 index 0000000..d347ee5 --- /dev/null +++ b/v1/internal/api/client.go @@ -0,0 +1,261 @@ +package api + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + + "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" +) + +type Client struct { + baseURL string + apiKey string + organizationID string + httpClient *http.Client +} + +func NewClient(baseURL, apiKey, organizationID string) *Client { + return &Client{ + baseURL: baseURL, + apiKey: apiKey, + organizationID: organizationID, + httpClient: &http.Client{}, + } +} + +func (c *Client) buildURL(path string, queryParams map[string]string) string { + u, _ := url.Parse(c.baseURL + path) + if len(queryParams) > 0 { + q := u.Query() + for k, v := range queryParams { + q.Set(k, v) + } + u.RawQuery = q.Encode() + } + return u.String() +} + +func (c *Client) setHeaders(req *http.Request) { + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+c.apiKey) + req.Header.Set("X-Organization-Id", c.organizationID) +} + +func (c *Client) GetBaseURL() string { + return c.baseURL +} + +func (c *Client) GetAPIKey() string { + return c.apiKey +} + +func (c *Client) GetOrganizationID() string { + return c.organizationID +} + +func (c *Client) AddToRunEvalQueue(payload *models.ExampleEvaluationRun) (*models.EvalResults, error) { + url := c.buildURL("/add_to_run_eval_queue/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.EvalResults + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} + +func (c *Client) LogEvalResults(payload *models.EvalResults) (*models.EvalResults, error) { + url := c.buildURL("/log_eval_results/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.EvalResults + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} + +func (c *Client) FetchExperimentRun(payload *models.EvalResultsFetch) (*models.EvalResults, error) { + url := c.buildURL("/fetch_experiment_run/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.EvalResults + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} + +func (c *Client) ScorerExists(payload *models.ScorerExistsRequest) (*models.ScorerExistsResponse, error) { + url := c.buildURL("/scorer_exists/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.ScorerExistsResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} + +func (c *Client) SaveScorer(payload *models.SavePromptScorerRequest) (*models.SavePromptScorerResponse, error) { + url := c.buildURL("/save_scorer/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.SavePromptScorerResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} + +func (c *Client) FetchScorers(payload *models.FetchPromptScorersRequest) (*models.FetchPromptScorersResponse, error) { + url := c.buildURL("/fetch_scorers/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.FetchPromptScorersResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} + +func (c *Client) ProjectsResolve(payload *models.ResolveProjectNameRequest) (*models.ResolveProjectNameResponse, error) { + url := c.buildURL("/projects/resolve/", nil) + jsonPayload, err := json.Marshal(payload) + if err != nil { + return nil, err + } + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonPayload)) + if err != nil { + return nil, err + } + c.setHeaders(req) + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode >= 400 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("HTTP Error: %d - %s", resp.StatusCode, string(body)) + } + + var result models.ResolveProjectNameResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + return &result, nil +} diff --git a/v1/internal/api/models/basescorer.go b/v1/internal/api/models/basescorer.go new file mode 100644 index 0000000..ddab6e6 --- /dev/null +++ b/v1/internal/api/models/basescorer.go @@ -0,0 +1,79 @@ +package models + +import ( + "encoding/json" +) + +type BaseScorer struct { + ScoreType string `json:"score_type,omitempty"` + Threshold float64 `json:"threshold,omitempty"` + Name string `json:"name,omitempty"` + ClassName string `json:"class_name,omitempty"` + Score float64 `json:"score,omitempty"` + ScoreBreakdown interface{} `json:"score_breakdown,omitempty"` + Reason string `json:"reason,omitempty"` + UsingNativeModel bool `json:"using_native_model,omitempty"` + Success bool `json:"success,omitempty"` + Model string `json:"model,omitempty"` + ModelClient interface{} `json:"model_client,omitempty"` + StrictMode bool `json:"strict_mode,omitempty"` + Error string `json:"error,omitempty"` + AdditionalMetadata interface{} `json:"additional_metadata,omitempty"` + User string `json:"user,omitempty"` + ServerHosted bool `json:"server_hosted,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *BaseScorer) UnmarshalJSON(data []byte) error { + type Alias BaseScorer + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m BaseScorer) MarshalJSON() ([]byte, error) { + type Alias BaseScorer + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/evalresults.go b/v1/internal/api/models/evalresults.go new file mode 100644 index 0000000..0ac7e53 --- /dev/null +++ b/v1/internal/api/models/evalresults.go @@ -0,0 +1,65 @@ +package models + +import ( + "encoding/json" +) + +type EvalResults struct { + Results []ScoringResult `json:"results,omitempty"` + Run interface{} `json:"run,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *EvalResults) UnmarshalJSON(data []byte) error { + type Alias EvalResults + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m EvalResults) MarshalJSON() ([]byte, error) { + type Alias EvalResults + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/evalresultsfetch.go b/v1/internal/api/models/evalresultsfetch.go new file mode 100644 index 0000000..ee71580 --- /dev/null +++ b/v1/internal/api/models/evalresultsfetch.go @@ -0,0 +1,65 @@ +package models + +import ( + "encoding/json" +) + +type EvalResultsFetch struct { + ExperimentRunId string `json:"experiment_run_id,omitempty"` + ProjectName string `json:"project_name,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *EvalResultsFetch) UnmarshalJSON(data []byte) error { + type Alias EvalResultsFetch + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m EvalResultsFetch) MarshalJSON() ([]byte, error) { + type Alias EvalResultsFetch + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/example.go b/v1/internal/api/models/example.go new file mode 100644 index 0000000..53541c2 --- /dev/null +++ b/v1/internal/api/models/example.go @@ -0,0 +1,66 @@ +package models + +import ( + "encoding/json" +) + +type Example struct { + ExampleId string `json:"example_id,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + Name string `json:"name,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *Example) UnmarshalJSON(data []byte) error { + type Alias Example + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m Example) MarshalJSON() ([]byte, error) { + type Alias Example + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/exampleevaluationrun.go b/v1/internal/api/models/exampleevaluationrun.go new file mode 100644 index 0000000..619b5b7 --- /dev/null +++ b/v1/internal/api/models/exampleevaluationrun.go @@ -0,0 +1,73 @@ +package models + +import ( + "encoding/json" +) + +type ExampleEvaluationRun struct { + Id string `json:"id,omitempty"` + ProjectName string `json:"project_name,omitempty"` + EvalName string `json:"eval_name,omitempty"` + CustomScorers []BaseScorer `json:"custom_scorers,omitempty"` + JudgmentScorers []ScorerConfig `json:"judgment_scorers,omitempty"` + Model string `json:"model,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + Examples []Example `json:"examples,omitempty"` + TraceSpanId string `json:"trace_span_id,omitempty"` + TraceId string `json:"trace_id,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ExampleEvaluationRun) UnmarshalJSON(data []byte) error { + type Alias ExampleEvaluationRun + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ExampleEvaluationRun) MarshalJSON() ([]byte, error) { + type Alias ExampleEvaluationRun + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/fetchpromptscorersrequest.go b/v1/internal/api/models/fetchpromptscorersrequest.go new file mode 100644 index 0000000..5611396 --- /dev/null +++ b/v1/internal/api/models/fetchpromptscorersrequest.go @@ -0,0 +1,65 @@ +package models + +import ( + "encoding/json" +) + +type FetchPromptScorersRequest struct { + Names []string `json:"names,omitempty"` + IsTrace bool `json:"is_trace,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *FetchPromptScorersRequest) UnmarshalJSON(data []byte) error { + type Alias FetchPromptScorersRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m FetchPromptScorersRequest) MarshalJSON() ([]byte, error) { + type Alias FetchPromptScorersRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/fetchpromptscorersresponse.go b/v1/internal/api/models/fetchpromptscorersresponse.go new file mode 100644 index 0000000..65604a8 --- /dev/null +++ b/v1/internal/api/models/fetchpromptscorersresponse.go @@ -0,0 +1,64 @@ +package models + +import ( + "encoding/json" +) + +type FetchPromptScorersResponse struct { + Scorers []PromptScorer `json:"scorers,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *FetchPromptScorersResponse) UnmarshalJSON(data []byte) error { + type Alias FetchPromptScorersResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m FetchPromptScorersResponse) MarshalJSON() ([]byte, error) { + type Alias FetchPromptScorersResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/oteltracespan.go b/v1/internal/api/models/oteltracespan.go new file mode 100644 index 0000000..d024b09 --- /dev/null +++ b/v1/internal/api/models/oteltracespan.go @@ -0,0 +1,81 @@ +package models + +import ( + "encoding/json" +) + +type OtelTraceSpan struct { + OrganizationId string `json:"organization_id,omitempty"` + ProjectId string `json:"project_id,omitempty"` + UserId string `json:"user_id,omitempty"` + Timestamp string `json:"timestamp,omitempty"` + TraceId string `json:"trace_id,omitempty"` + SpanId string `json:"span_id,omitempty"` + ParentSpanId string `json:"parent_span_id,omitempty"` + TraceState string `json:"trace_state,omitempty"` + SpanName string `json:"span_name,omitempty"` + SpanKind string `json:"span_kind,omitempty"` + ServiceName string `json:"service_name,omitempty"` + ResourceAttributes interface{} `json:"resource_attributes,omitempty"` + SpanAttributes interface{} `json:"span_attributes,omitempty"` + Duration int `json:"duration,omitempty"` + StatusCode int `json:"status_code,omitempty"` + StatusMessage string `json:"status_message,omitempty"` + Events []interface{} `json:"events,omitempty"` + Links []interface{} `json:"links,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *OtelTraceSpan) UnmarshalJSON(data []byte) error { + type Alias OtelTraceSpan + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m OtelTraceSpan) MarshalJSON() ([]byte, error) { + type Alias OtelTraceSpan + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/promptscorer.go b/v1/internal/api/models/promptscorer.go new file mode 100644 index 0000000..aaf54ac --- /dev/null +++ b/v1/internal/api/models/promptscorer.go @@ -0,0 +1,76 @@ +package models + +import ( + "encoding/json" +) + +type PromptScorer struct { + Id string `json:"id,omitempty"` + UserId string `json:"user_id,omitempty"` + OrganizationId string `json:"organization_id,omitempty"` + Name string `json:"name,omitempty"` + Prompt string `json:"prompt,omitempty"` + Threshold float64 `json:"threshold,omitempty"` + Model string `json:"model,omitempty"` + Options interface{} `json:"options,omitempty"` + Description string `json:"description,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + UpdatedAt string `json:"updated_at,omitempty"` + IsTrace bool `json:"is_trace,omitempty"` + IsBucketRubric bool `json:"is_bucket_rubric,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *PromptScorer) UnmarshalJSON(data []byte) error { + type Alias PromptScorer + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m PromptScorer) MarshalJSON() ([]byte, error) { + type Alias PromptScorer + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/resolveprojectnamerequest.go b/v1/internal/api/models/resolveprojectnamerequest.go new file mode 100644 index 0000000..7b8d63a --- /dev/null +++ b/v1/internal/api/models/resolveprojectnamerequest.go @@ -0,0 +1,64 @@ +package models + +import ( + "encoding/json" +) + +type ResolveProjectNameRequest struct { + ProjectName string `json:"project_name,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ResolveProjectNameRequest) UnmarshalJSON(data []byte) error { + type Alias ResolveProjectNameRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ResolveProjectNameRequest) MarshalJSON() ([]byte, error) { + type Alias ResolveProjectNameRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/resolveprojectnameresponse.go b/v1/internal/api/models/resolveprojectnameresponse.go new file mode 100644 index 0000000..f134f1d --- /dev/null +++ b/v1/internal/api/models/resolveprojectnameresponse.go @@ -0,0 +1,64 @@ +package models + +import ( + "encoding/json" +) + +type ResolveProjectNameResponse struct { + ProjectId string `json:"project_id,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ResolveProjectNameResponse) UnmarshalJSON(data []byte) error { + type Alias ResolveProjectNameResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ResolveProjectNameResponse) MarshalJSON() ([]byte, error) { + type Alias ResolveProjectNameResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/savepromptscorerrequest.go b/v1/internal/api/models/savepromptscorerrequest.go new file mode 100644 index 0000000..b8b9101 --- /dev/null +++ b/v1/internal/api/models/savepromptscorerrequest.go @@ -0,0 +1,70 @@ +package models + +import ( + "encoding/json" +) + +type SavePromptScorerRequest struct { + Name string `json:"name,omitempty"` + Prompt string `json:"prompt,omitempty"` + Threshold float64 `json:"threshold,omitempty"` + Model string `json:"model,omitempty"` + IsTrace bool `json:"is_trace,omitempty"` + Options interface{} `json:"options,omitempty"` + Description string `json:"description,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *SavePromptScorerRequest) UnmarshalJSON(data []byte) error { + type Alias SavePromptScorerRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m SavePromptScorerRequest) MarshalJSON() ([]byte, error) { + type Alias SavePromptScorerRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/savepromptscorerresponse.go b/v1/internal/api/models/savepromptscorerresponse.go new file mode 100644 index 0000000..6651b4a --- /dev/null +++ b/v1/internal/api/models/savepromptscorerresponse.go @@ -0,0 +1,64 @@ +package models + +import ( + "encoding/json" +) + +type SavePromptScorerResponse struct { + ScorerResponse PromptScorer `json:"scorer_response,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *SavePromptScorerResponse) UnmarshalJSON(data []byte) error { + type Alias SavePromptScorerResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m SavePromptScorerResponse) MarshalJSON() ([]byte, error) { + type Alias SavePromptScorerResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/scorerconfig.go b/v1/internal/api/models/scorerconfig.go new file mode 100644 index 0000000..51a47fc --- /dev/null +++ b/v1/internal/api/models/scorerconfig.go @@ -0,0 +1,70 @@ +package models + +import ( + "encoding/json" +) + +type ScorerConfig struct { + ScoreType string `json:"score_type,omitempty"` + Name string `json:"name,omitempty"` + Threshold float64 `json:"threshold,omitempty"` + Model string `json:"model,omitempty"` + StrictMode bool `json:"strict_mode,omitempty"` + RequiredParams []string `json:"required_params,omitempty"` + Kwargs interface{} `json:"kwargs,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ScorerConfig) UnmarshalJSON(data []byte) error { + type Alias ScorerConfig + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ScorerConfig) MarshalJSON() ([]byte, error) { + type Alias ScorerConfig + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/scorerdata.go b/v1/internal/api/models/scorerdata.go new file mode 100644 index 0000000..b1899c9 --- /dev/null +++ b/v1/internal/api/models/scorerdata.go @@ -0,0 +1,73 @@ +package models + +import ( + "encoding/json" +) + +type ScorerData struct { + Id string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Threshold float64 `json:"threshold,omitempty"` + Success bool `json:"success,omitempty"` + Score float64 `json:"score,omitempty"` + Reason string `json:"reason,omitempty"` + StrictMode bool `json:"strict_mode,omitempty"` + EvaluationModel string `json:"evaluation_model,omitempty"` + Error string `json:"error,omitempty"` + AdditionalMetadata interface{} `json:"additional_metadata,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ScorerData) UnmarshalJSON(data []byte) error { + type Alias ScorerData + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ScorerData) MarshalJSON() ([]byte, error) { + type Alias ScorerData + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/scorerexistsrequest.go b/v1/internal/api/models/scorerexistsrequest.go new file mode 100644 index 0000000..327ac26 --- /dev/null +++ b/v1/internal/api/models/scorerexistsrequest.go @@ -0,0 +1,64 @@ +package models + +import ( + "encoding/json" +) + +type ScorerExistsRequest struct { + Name string `json:"name,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ScorerExistsRequest) UnmarshalJSON(data []byte) error { + type Alias ScorerExistsRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ScorerExistsRequest) MarshalJSON() ([]byte, error) { + type Alias ScorerExistsRequest + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/scorerexistsresponse.go b/v1/internal/api/models/scorerexistsresponse.go new file mode 100644 index 0000000..9d16e0b --- /dev/null +++ b/v1/internal/api/models/scorerexistsresponse.go @@ -0,0 +1,64 @@ +package models + +import ( + "encoding/json" +) + +type ScorerExistsResponse struct { + Exists bool `json:"exists,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ScorerExistsResponse) UnmarshalJSON(data []byte) error { + type Alias ScorerExistsResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ScorerExistsResponse) MarshalJSON() ([]byte, error) { + type Alias ScorerExistsResponse + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/scoringresult.go b/v1/internal/api/models/scoringresult.go new file mode 100644 index 0000000..39523e9 --- /dev/null +++ b/v1/internal/api/models/scoringresult.go @@ -0,0 +1,70 @@ +package models + +import ( + "encoding/json" +) + +type ScoringResult struct { + Success bool `json:"success,omitempty"` + ScorersData []ScorerData `json:"scorers_data,omitempty"` + Name string `json:"name,omitempty"` + DataObject interface{} `json:"data_object,omitempty"` + TraceId string `json:"trace_id,omitempty"` + RunDuration float64 `json:"run_duration,omitempty"` + EvaluationCost float64 `json:"evaluation_cost,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *ScoringResult) UnmarshalJSON(data []byte) error { + type Alias ScoringResult + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m ScoringResult) MarshalJSON() ([]byte, error) { + type Alias ScoringResult + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/internal/api/models/traceevaluationrun.go b/v1/internal/api/models/traceevaluationrun.go new file mode 100644 index 0000000..3961b4e --- /dev/null +++ b/v1/internal/api/models/traceevaluationrun.go @@ -0,0 +1,73 @@ +package models + +import ( + "encoding/json" +) + +type TraceEvaluationRun struct { + Id string `json:"id,omitempty"` + ProjectName string `json:"project_name,omitempty"` + EvalName string `json:"eval_name,omitempty"` + CustomScorers []BaseScorer `json:"custom_scorers,omitempty"` + JudgmentScorers []ScorerConfig `json:"judgment_scorers,omitempty"` + Model string `json:"model,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + TraceAndSpanIds [][]interface{} `json:"trace_and_span_ids,omitempty"` + IsOffline bool `json:"is_offline,omitempty"` + IsBucketRun bool `json:"is_bucket_run,omitempty"` + + AdditionalProperties map[string]interface{} `json:"-"` +} + +func (m *TraceEvaluationRun) UnmarshalJSON(data []byte) error { + type Alias TraceEvaluationRun + aux := &struct { + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, &aux); err != nil { + { + return err + } + } + m.AdditionalProperties = make(map[string]interface{}) + if err := json.Unmarshal(data, &m.AdditionalProperties); err != nil { + { + return err + } + } + return nil +} + +func (m TraceEvaluationRun) MarshalJSON() ([]byte, error) { + type Alias TraceEvaluationRun + aux := &struct { + *Alias + }{ + Alias: (*Alias)(&m), + } + + result := make(map[string]interface{}) + + mainBytes, err := json.Marshal(aux) + if err != nil { + { + return nil, err + } + } + + if err := json.Unmarshal(mainBytes, &result); err != nil { + { + return nil, err + } + } + + for k, v := range m.AdditionalProperties { + { + result[k] = v + } + } + + return json.Marshal(result) +} diff --git a/v1/judgeval.go b/v1/judgeval.go new file mode 100644 index 0000000..ac3db66 --- /dev/null +++ b/v1/judgeval.go @@ -0,0 +1,46 @@ +package v1 + +import ( + "errors" + + "github.com/JudgmentLabs/judgeval-go/pkg/env" + "github.com/JudgmentLabs/judgeval-go/v1/internal/api" +) + +type Judgeval struct { + apiClient *api.Client + Tracer *TracerFactory + Scorers *ScorersFactory + Evaluation *EvaluationFactory +} + +func NewJudgeval(opts ...Option) (*Judgeval, error) { + cfg := &clientConfig{ + apiKey: env.JudgmentAPIKey, + orgID: env.JudgmentOrgID, + apiURL: env.JudgmentAPIURL, + } + + for _, opt := range opts { + opt.apply(cfg) + } + + if cfg.apiKey == "" { + return nil, errors.New("API key is required: set JUDGMENT_API_KEY environment variable or use WithAPIKey option") + } + if cfg.orgID == "" { + return nil, errors.New("organization ID is required: set JUDGMENT_ORG_ID environment variable or use WithOrganizationID option") + } + if cfg.apiURL == "" { + return nil, errors.New("API URL is required: set JUDGMENT_API_URL environment variable or use WithAPIURL option") + } + + apiClient := api.NewClient(cfg.apiURL, cfg.apiKey, cfg.orgID) + + return &Judgeval{ + apiClient: apiClient, + Tracer: &TracerFactory{client: apiClient}, + Scorers: newScorersFactory(apiClient), + Evaluation: &EvaluationFactory{client: apiClient}, + }, nil +} diff --git a/v1/judgment_trace_keys.go b/v1/judgment_trace_keys.go new file mode 100644 index 0000000..ec248f0 --- /dev/null +++ b/v1/judgment_trace_keys.go @@ -0,0 +1,40 @@ +package v1 + +const ( + AttributeKeysJudgmentSpanKind = "judgment.span_kind" + AttributeKeysJudgmentInput = "judgment.input" + AttributeKeysJudgmentOutput = "judgment.output" + AttributeKeysJudgmentOfflineMode = "judgment.offline_mode" + AttributeKeysJudgmentUpdateID = "judgment.update_id" + AttributeKeysJudgmentCustomerID = "judgment.customer_id" + AttributeKeysJudgmentAgentID = "judgment.agent_id" + AttributeKeysJudgmentParentAgentID = "judgment.parent_agent_id" + AttributeKeysJudgmentAgentClassName = "judgment.agent_class_name" + AttributeKeysJudgmentAgentInstanceName = "judgment.agent_instance_name" + AttributeKeysJudgmentIsAgentEntryPoint = "judgment.is_agent_entry_point" + AttributeKeysJudgmentCumulativeLLMCost = "judgment.cumulative_llm_cost" + AttributeKeysJudgmentStateBefore = "judgment.state_before" + AttributeKeysJudgmentStateAfter = "judgment.state_after" + AttributeKeysPendingTraceEval = "judgment.pending_trace_eval" + + AttributeKeysGenAIPrompt = "gen_ai.prompt" + AttributeKeysGenAICompletion = "gen_ai.completion" + AttributeKeysGenAIRequestModel = "gen_ai.request.model" + AttributeKeysGenAIResponseModel = "gen_ai.response.model" + AttributeKeysGenAISystem = "gen_ai.system" + AttributeKeysGenAIUsageInputTokens = "gen_ai.usage.input_tokens" + AttributeKeysGenAIUsageOutputTokens = "gen_ai.usage.output_tokens" + AttributeKeysGenAIUsageCacheCreationInputTokens = "gen_ai.usage.cache_creation_input_tokens" + AttributeKeysGenAIUsageCacheReadInputTokens = "gen_ai.usage.cache_read_input_tokens" + AttributeKeysGenAIRequestTemperature = "gen_ai.request.temperature" + AttributeKeysGenAIRequestMaxTokens = "gen_ai.request.max_tokens" + AttributeKeysGenAIResponseFinishReasons = "gen_ai.response.finish_reasons" +) + +const ( + ResourceKeysServiceName = "service.name" + ResourceKeysTelemetrySDKLanguage = "telemetry.sdk.language" + ResourceKeysTelemetrySDKName = "telemetry.sdk.name" + ResourceKeysTelemetrySDKVersion = "telemetry.sdk.version" + ResourceKeysJudgmentProjectID = "judgment.project_id" +) diff --git a/v1/option.go b/v1/option.go new file mode 100644 index 0000000..ea78eb0 --- /dev/null +++ b/v1/option.go @@ -0,0 +1,72 @@ +package v1 + +type clientConfig struct { + apiKey string + orgID string + apiURL string +} + +type Option interface { + apply(*clientConfig) +} + +type optionFunc func(*clientConfig) + +func (f optionFunc) apply(c *clientConfig) { + f(c) +} + +func WithAPIKey(key string) Option { + return optionFunc(func(c *clientConfig) { + c.apiKey = key + }) +} + +func WithOrganizationID(id string) Option { + return optionFunc(func(c *clientConfig) { + c.orgID = id + }) +} + +func WithAPIURL(url string) Option { + return optionFunc(func(c *clientConfig) { + c.apiURL = url + }) +} + +func Bool(v bool) *bool { + return &v +} + +func String(v string) *string { + return &v +} + +func Float(v float64) *float64 { + return &v +} + +func Int(v int) *int { + return &v +} + +func getBool(ptr *bool, defaultVal bool) bool { + if ptr == nil { + return defaultVal + } + return *ptr +} + +func getString(ptr *string, defaultVal string) string { + if ptr == nil { + return defaultVal + } + return *ptr +} + +func getFloat(ptr *float64, defaultVal float64) float64 { + if ptr == nil { + return defaultVal + } + return *ptr +} diff --git a/v1/prompt_scorer.go b/v1/prompt_scorer.go new file mode 100644 index 0000000..24896fb --- /dev/null +++ b/v1/prompt_scorer.go @@ -0,0 +1,231 @@ +package v1 + +import ( + "context" + "fmt" + "sync" + + "github.com/JudgmentLabs/judgeval-go/pkg/env" + "github.com/JudgmentLabs/judgeval-go/v1/internal/api" + "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" +) + +type PromptScorerFactory struct { + client *api.Client + isTrace bool + cache sync.Map +} + +type PromptScorerCreateParams struct { + Name string + Prompt string + Threshold float64 + Options map[string]float64 + Model *string + Description *string +} + +type PromptScorer struct { + name string + prompt string + threshold float64 + options map[string]float64 + model string + description string + isTrace bool +} + +func (f *PromptScorerFactory) Get(ctx context.Context, name string) (*PromptScorer, error) { + cacheKey := f.buildCacheKey(name) + + if cached, ok := f.cache.Load(cacheKey); ok { + return cached.(*PromptScorer), nil + } + + req := &models.FetchPromptScorersRequest{ + Names: []string{name}, + } + + resp, err := f.client.FetchScorers(req) + if err != nil { + return nil, fmt.Errorf("failed to fetch prompt scorer '%s': %w", name, err) + } + + if len(resp.Scorers) == 0 { + return nil, fmt.Errorf("failed to fetch prompt scorer '%s': not found", name) + } + + scorerModel := resp.Scorers[0] + scorerIsTrace := scorerModel.IsTrace + + if scorerIsTrace != f.isTrace { + expectedType := "PromptScorer" + actualType := "PromptScorer" + if f.isTrace { + expectedType = "TracePromptScorer" + } + if scorerIsTrace { + actualType = "TracePromptScorer" + } + return nil, fmt.Errorf("scorer with name %s is a %s, not a %s", name, actualType, expectedType) + } + + scorer := f.createFromModel(&scorerModel, name) + f.cache.Store(cacheKey, scorer) + + return scorer, nil +} + +func (f *PromptScorerFactory) Create(params PromptScorerCreateParams) (*PromptScorer, error) { + if params.Name == "" { + return nil, fmt.Errorf("name is required") + } + if params.Prompt == "" { + return nil, fmt.Errorf("prompt is required") + } + + model := env.JudgmentDefaultGPTModel + if params.Model != nil { + model = *params.Model + } + + description := "" + if params.Description != nil { + description = *params.Description + } + + return &PromptScorer{ + name: params.Name, + prompt: params.Prompt, + threshold: params.Threshold, + options: params.Options, + model: model, + description: description, + isTrace: f.isTrace, + }, nil +} + +func (f *PromptScorerFactory) createFromModel(model *models.PromptScorer, name string) *PromptScorer { + options := make(map[string]float64) + if model.Options != nil { + if optsMap, ok := model.Options.(map[string]interface{}); ok { + for k, v := range optsMap { + if floatVal, ok := v.(float64); ok { + options[k] = floatVal + } + } + } + } + + threshold := 0.5 + if model.Threshold != 0 { + threshold = model.Threshold + } + + modelName := env.JudgmentDefaultGPTModel + if model.Model != "" { + modelName = model.Model + } + + description := "" + if model.Description != "" { + description = model.Description + } + + return &PromptScorer{ + name: name, + prompt: model.Prompt, + threshold: threshold, + options: options, + model: modelName, + description: description, + isTrace: f.isTrace, + } +} + +func (f *PromptScorerFactory) buildCacheKey(name string) string { + return fmt.Sprintf("%s:%s:%s", name, f.client.GetAPIKey(), f.client.GetOrganizationID()) +} + +func (s *PromptScorer) GetName() string { + return s.name +} + +func (s *PromptScorer) GetPrompt() string { + return s.prompt +} + +func (s *PromptScorer) GetThreshold() float64 { + return s.threshold +} + +func (s *PromptScorer) GetOptions() map[string]float64 { + optsCopy := make(map[string]float64) + for k, v := range s.options { + optsCopy[k] = v + } + return optsCopy +} + +func (s *PromptScorer) GetModel() string { + return s.model +} + +func (s *PromptScorer) GetDescription() string { + return s.description +} + +func (s *PromptScorer) SetThreshold(threshold float64) { + s.threshold = threshold +} + +func (s *PromptScorer) SetPrompt(prompt string) { + s.prompt = prompt +} + +func (s *PromptScorer) SetModel(model string) { + s.model = model +} + +func (s *PromptScorer) SetOptions(options map[string]float64) { + s.options = make(map[string]float64) + for k, v := range options { + s.options[k] = v + } +} + +func (s *PromptScorer) SetDescription(description string) { + s.description = description +} + +func (s *PromptScorer) AppendToPrompt(addition string) { + s.prompt = s.prompt + addition +} + +func (s *PromptScorer) GetScorerConfig() *models.ScorerConfig { + scoreType := APIScorerTypePromptScorer.String() + if s.isTrace { + scoreType = APIScorerTypeTracePromptScorer.String() + } + + kwargs := map[string]interface{}{ + "prompt": s.prompt, + } + + if len(s.options) > 0 { + kwargs["options"] = s.options + } + if s.model != "" { + kwargs["model"] = s.model + } + if s.description != "" { + kwargs["description"] = s.description + } + + return &models.ScorerConfig{ + ScoreType: scoreType, + Threshold: s.threshold, + Name: s.name, + Kwargs: kwargs, + } +} diff --git a/v1/scorers.go b/v1/scorers.go new file mode 100644 index 0000000..32b0a33 --- /dev/null +++ b/v1/scorers.go @@ -0,0 +1,21 @@ +package v1 + +import "github.com/JudgmentLabs/judgeval-go/v1/internal/api" + +type ScorersFactory struct { + client *api.Client + BuiltIn *BuiltInScorersFactory + PromptScorer *PromptScorerFactory + TracePromptScorer *PromptScorerFactory + CustomScorer *CustomScorerFactory +} + +func newScorersFactory(client *api.Client) *ScorersFactory { + return &ScorersFactory{ + client: client, + BuiltIn: &BuiltInScorersFactory{}, + PromptScorer: &PromptScorerFactory{client: client, isTrace: false}, + TracePromptScorer: &PromptScorerFactory{client: client, isTrace: true}, + CustomScorer: &CustomScorerFactory{}, + } +} diff --git a/v1/tracer.go b/v1/tracer.go new file mode 100644 index 0000000..68cb696 --- /dev/null +++ b/v1/tracer.go @@ -0,0 +1,405 @@ +package v1 + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/JudgmentLabs/judgeval-go/pkg/logger" + "github.com/JudgmentLabs/judgeval-go/pkg/version" + "github.com/JudgmentLabs/judgeval-go/v1/internal/api" + "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" + "github.com/google/uuid" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.21.0" + "go.opentelemetry.io/otel/trace" +) + +const TracerName = "judgeval" + +type TracerFactory struct { + client *api.Client +} + +type TracerCreateParams struct { + ProjectName string + EnableEvaluation *bool + Serializer SerializerFunc + ResourceAttributes map[string]interface{} + Initialize *bool +} + +func (f *TracerFactory) Create(ctx context.Context, params TracerCreateParams) (*Tracer, error) { + if params.ProjectName == "" { + return nil, fmt.Errorf("project name is required") + } + + serializer := params.Serializer + if serializer == nil { + serializer = defaultJSONSerializer + } + + projectID, err := resolveProjectID(f.client, params.ProjectName) + if err != nil { + logger.Error("Failed to resolve project %s: %v. Skipping Judgment export.", params.ProjectName, err) + projectID = "" + } + + tracer := &Tracer{ + BaseTracer: &BaseTracer{ + projectName: params.ProjectName, + projectID: projectID, + enableEvaluation: getBool(params.EnableEvaluation, true), + apiClient: f.client, + serializer: serializer, + tracer: otel.Tracer(TracerName), + }, + resourceAttributes: params.ResourceAttributes, + } + + if getBool(params.Initialize, true) { + if err := tracer.Initialize(ctx); err != nil { + return nil, err + } + } + + return tracer, nil +} + +type Tracer struct { + *BaseTracer + tracerProvider *sdktrace.TracerProvider + resourceAttributes map[string]interface{} +} + +func (t *Tracer) Initialize(ctx context.Context) error { + if t.tracerProvider != nil { + logger.Warning("Tracer already initialized") + return nil + } + + attrs := []attribute.KeyValue{ + semconv.ServiceName(t.projectName), + attribute.String("telemetry.sdk.name", TracerName), + attribute.String("telemetry.sdk.version", version.Version), + } + + for k, v := range t.resourceAttributes { + switch val := v.(type) { + case string: + attrs = append(attrs, attribute.String(k, val)) + case int: + attrs = append(attrs, attribute.Int(k, val)) + case int8: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case int16: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case int32: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case int64: + attrs = append(attrs, attribute.Int64(k, val)) + case uint: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case uint8: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case uint16: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case uint32: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case uint64: + attrs = append(attrs, attribute.Int64(k, int64(val))) + case float32: + attrs = append(attrs, attribute.Float64(k, float64(val))) + case float64: + attrs = append(attrs, attribute.Float64(k, val)) + case bool: + attrs = append(attrs, attribute.Bool(k, val)) + case []string: + attrs = append(attrs, attribute.StringSlice(k, val)) + default: + serialized, err := t.serializer(val) + if err == nil { + attrs = append(attrs, attribute.String(k, serialized)) + } + } + } + + res := resource.NewWithAttributes( + semconv.SchemaURL, + attrs..., + ) + + spanExporter := t.getSpanExporter(ctx) + + t.tracerProvider = sdktrace.NewTracerProvider( + sdktrace.WithResource(res), + sdktrace.WithBatcher(spanExporter), + ) + + otel.SetTracerProvider(t.tracerProvider) + + logger.Info("Tracer initialized successfully") + return nil +} + +func (t *Tracer) ForceFlush(ctx context.Context) error { + if t.tracerProvider == nil { + logger.Warning("Tracer not initialized, skipping force flush") + return nil + } + return t.tracerProvider.ForceFlush(ctx) +} + +func (t *Tracer) Shutdown(ctx context.Context) error { + if t.tracerProvider == nil { + logger.Warning("Tracer not initialized, skipping shutdown") + return nil + } + + err := t.tracerProvider.Shutdown(ctx) + if err != nil { + logger.Error("Failed to shutdown Tracer: %v", err) + return err + } + + t.tracerProvider = nil + logger.Info("Tracer shut down successfully") + return nil +} + +type BaseTracer struct { + projectName string + projectID string + enableEvaluation bool + apiClient *api.Client + serializer SerializerFunc + tracer trace.Tracer +} + +func (b *BaseTracer) GetTracer() trace.Tracer { + return b.tracer +} + +func (b *BaseTracer) Span(ctx context.Context, spanName string) (context.Context, trace.Span) { + ctx, span := b.tracer.Start(ctx, spanName) + return ctx, span +} + +func (b *BaseTracer) SetSpanKind(span trace.Span, kind string) { + if kind != "" { + span.SetAttributes(attribute.String(AttributeKeysJudgmentSpanKind, kind)) + } +} + +func (b *BaseTracer) SetLLMSpan(span trace.Span) { + b.SetSpanKind(span, "llm") +} + +func (b *BaseTracer) SetToolSpan(span trace.Span) { + b.SetSpanKind(span, "tool") +} + +func (b *BaseTracer) SetGeneralSpan(span trace.Span) { + b.SetSpanKind(span, "span") +} + +func (b *BaseTracer) SetAttribute(span trace.Span, key string, value interface{}) { + if key == "" { + return + } + + switch v := value.(type) { + case string: + span.SetAttributes(attribute.String(key, v)) + case int: + span.SetAttributes(attribute.Int(key, v)) + case int8: + span.SetAttributes(attribute.Int64(key, int64(v))) + case int16: + span.SetAttributes(attribute.Int64(key, int64(v))) + case int32: + span.SetAttributes(attribute.Int64(key, int64(v))) + case int64: + span.SetAttributes(attribute.Int64(key, v)) + case uint: + span.SetAttributes(attribute.Int64(key, int64(v))) + case uint8: + span.SetAttributes(attribute.Int64(key, int64(v))) + case uint16: + span.SetAttributes(attribute.Int64(key, int64(v))) + case uint32: + span.SetAttributes(attribute.Int64(key, int64(v))) + case uint64: + span.SetAttributes(attribute.Int64(key, int64(v))) + case float32: + span.SetAttributes(attribute.Float64(key, float64(v))) + case float64: + span.SetAttributes(attribute.Float64(key, v)) + case bool: + span.SetAttributes(attribute.Bool(key, v)) + case []string: + span.SetAttributes(attribute.StringSlice(key, v)) + default: + serialized, err := b.serializer(v) + if err == nil { + span.SetAttributes(attribute.String(key, serialized)) + } + } +} + +func (b *BaseTracer) SetAttributes(span trace.Span, attrs map[string]interface{}) { + for k, v := range attrs { + b.SetAttribute(span, k, v) + } +} + +func (b *BaseTracer) SetInput(span trace.Span, input interface{}) { + b.SetAttribute(span, AttributeKeysJudgmentInput, input) +} + +func (b *BaseTracer) SetOutput(span trace.Span, output interface{}) { + b.SetAttribute(span, AttributeKeysJudgmentOutput, output) +} + +func (b *BaseTracer) AsyncEvaluate(ctx context.Context, scorer BaseScorer, example *Example) { + if !b.enableEvaluation { + return + } + + span := trace.SpanFromContext(ctx) + if span == nil || !span.SpanContext().IsSampled() { + return + } + + spanContext := span.SpanContext() + traceID := spanContext.TraceID().String() + spanID := spanContext.SpanID().String() + + logger.Info("asyncEvaluate: project=%s, traceId=%s, spanId=%s, scorer=%s", + b.projectName, traceID, spanID, scorer.GetName()) + + evaluationRun := b.createEvaluationRun(scorer, example, traceID, spanID) + + go func() { + if _, err := b.apiClient.AddToRunEvalQueue(evaluationRun); err != nil { + logger.Error("Failed to enqueue evaluation run: %v", err) + } + }() +} + +func (b *BaseTracer) AsyncTraceEvaluate(ctx context.Context, scorer BaseScorer) { + if !b.enableEvaluation { + return + } + + span := trace.SpanFromContext(ctx) + if span == nil || !span.SpanContext().IsSampled() { + return + } + + spanContext := span.SpanContext() + traceID := spanContext.TraceID().String() + spanID := spanContext.SpanID().String() + + logger.Info("asyncTraceEvaluate: project=%s, traceId=%s, spanId=%s, scorer=%s", + b.projectName, traceID, spanID, scorer.GetName()) + + evaluationRun := b.createTraceEvaluationRun(scorer, traceID, spanID) + + traceEvalJSON, err := json.Marshal(evaluationRun) + if err != nil { + logger.Error("Failed to serialize trace evaluation: %v", err) + return + } + + span.SetAttributes(attribute.String(AttributeKeysPendingTraceEval, string(traceEvalJSON))) +} + +func (b *BaseTracer) getSpanExporter(ctx context.Context) sdktrace.SpanExporter { + if b.projectID != "" { + return newJudgmentSpanExporter(ctx, b.buildEndpoint(), b.apiClient, b.projectID) + } + logger.Error("Project not resolved; cannot create exporter, returning NoOpSpanExporter") + return newNoOpSpanExporter() +} + +func (b *BaseTracer) buildEndpoint() string { + baseURL := b.apiClient.GetBaseURL() + if len(baseURL) > 0 && baseURL[len(baseURL)-1] == '/' { + return baseURL + "otel/v1/traces" + } + return baseURL + "/otel/v1/traces" +} + +func (b *BaseTracer) createEvaluationRun(scorer BaseScorer, example *Example, traceID, spanID string) *models.ExampleEvaluationRun { + runID := "async_evaluate_" + spanID + + return &models.ExampleEvaluationRun{ + Id: uuid.New().String(), + ProjectName: b.projectName, + EvalName: runID, + TraceId: traceID, + TraceSpanId: spanID, + Examples: []models.Example{example.toModel()}, + JudgmentScorers: []models.ScorerConfig{*scorer.GetScorerConfig()}, + CustomScorers: []models.BaseScorer{}, + CreatedAt: time.Now().UTC().Format(time.RFC3339), + } +} + +func (b *BaseTracer) createTraceEvaluationRun(scorer BaseScorer, traceID, spanID string) *models.TraceEvaluationRun { + evalName := "async_trace_evaluate_" + spanID + + return &models.TraceEvaluationRun{ + Id: uuid.New().String(), + ProjectName: b.projectName, + EvalName: evalName, + TraceAndSpanIds: [][]any{{traceID, spanID}}, + JudgmentScorers: []models.ScorerConfig{*scorer.GetScorerConfig()}, + CustomScorers: []models.BaseScorer{}, + IsOffline: false, + IsBucketRun: false, + CreatedAt: time.Now().UTC().Format(time.RFC3339), + } +} + +func resolveProjectID(client *api.Client, projectName string) (string, error) { + logger.Info("Resolving project ID for project: %s", projectName) + + req := &models.ResolveProjectNameRequest{ + ProjectName: projectName, + } + + resp, err := client.ProjectsResolve(req) + if err != nil { + return "", fmt.Errorf("failed to resolve project ID: %w", err) + } + + if resp.ProjectId == "" { + return "", fmt.Errorf("project ID not found for project: %s", projectName) + } + + logger.Info("Resolved project ID: %s", resp.ProjectId) + return resp.ProjectId, nil +} + +func defaultJSONSerializer(v interface{}) (string, error) { + bytes, err := json.Marshal(v) + if err != nil { + return "", err + } + return string(bytes), nil +} + +func (b *BaseTracer) StartSpan(ctx context.Context, spanName string) (context.Context, trace.Span) { + ctx, span := b.tracer.Start(ctx, spanName) + return ctx, span +} + +func (b *BaseTracer) EndSpan(span trace.Span) { + span.End() +} diff --git a/v1/types.go b/v1/types.go new file mode 100644 index 0000000..20bb616 --- /dev/null +++ b/v1/types.go @@ -0,0 +1,22 @@ +package v1 + +import "github.com/JudgmentLabs/judgeval-go/v1/internal/api/models" + +type APIScorerType string + +const ( + APIScorerTypePromptScorer APIScorerType = "Prompt Scorer" + APIScorerTypeTracePromptScorer APIScorerType = "Trace Prompt Scorer" + APIScorerTypeFaithfulness APIScorerType = "Faithfulness" + APIScorerTypeAnswerRelevancy APIScorerType = "Answer Relevancy" + APIScorerTypeAnswerCorrectness APIScorerType = "Answer Correctness" + APIScorerTypeCustom APIScorerType = "Custom" +) + +func (t APIScorerType) String() string { + return string(t) +} + +type ScorerConfig = models.ScorerConfig + +type SerializerFunc func(interface{}) (string, error)