Skip to content

Retrieve Results of an Evaluation Run Prompt

client.Agents.EvaluationRuns.GetResults(ctx, evaluationRunUuid, promptID) (*AgentEvaluationRunGetResultsResponse, error)
get/v2/gen-ai/evaluation_runs/{evaluation_run_uuid}/results/{prompt_id}

To retrieve results of an evaluation run, send a GET request to /v2/gen-ai/evaluation_runs/{evaluation_run_uuid}/results/{prompt_id}.

ParametersExpand Collapse
evaluationRunUuid string
promptID int64
ReturnsExpand Collapse
type AgentEvaluationRunGetResultsResponse struct{…}
Prompt APIEvaluationPromptoptional
EvaluationTraceSpans []APIEvaluationPromptEvaluationTraceSpanoptional

The evaluated trace spans.

CreatedAt Timeoptional

When the span was created

formatdate-time
Input unknownoptional

Input data for the span (flexible structure - can be messages array, string, etc.)

Name stringoptional

Name/identifier for the span

Output unknownoptional

Output data from the span (flexible structure - can be message, string, etc.)

RetrieverChunks []APIEvaluationPromptEvaluationTraceSpansRetrieverChunkoptional

Any retriever span chunks that were included as part of the span.

ChunkUsagePct float64optional

The usage percentage of the chunk.

formatdouble
ChunkUsed booloptional

Indicates if the chunk was used in the prompt.

IndexUuid stringoptional

The index uuid (Knowledge Base) of the chunk.

SourceName stringoptional

The source name for the chunk, e.g., the file name or document title.

Text stringoptional

Text content of the chunk.

SpanLevelMetricResults []APIEvaluationMetricResultoptional

The span-level metric results.

ErrorDescription stringoptional

Error description if the metric could not be calculated.

MetricName stringoptional

Metric name

MetricValueType APIEvaluationMetricResultMetricValueTypeoptional
Accepts one of the following:
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeUnspecified APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_UNSPECIFIED"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeNumber APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_NUMBER"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeString APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_STRING"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypePercentage APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_PERCENTAGE"
NumberValue float64optional

The value of the metric as a number.

formatdouble
Reasoning stringoptional

Reasoning of the metric result.

StringValue stringoptional

The value of the metric as a string.

Type APIEvaluationPromptEvaluationTraceSpansTypeoptional

Types of spans in a trace

Accepts one of the following:
const APIEvaluationPromptEvaluationTraceSpansTypeTraceSpanTypeUnknown APIEvaluationPromptEvaluationTraceSpansType = "TRACE_SPAN_TYPE_UNKNOWN"
const APIEvaluationPromptEvaluationTraceSpansTypeTraceSpanTypeLlm APIEvaluationPromptEvaluationTraceSpansType = "TRACE_SPAN_TYPE_LLM"
const APIEvaluationPromptEvaluationTraceSpansTypeTraceSpanTypeRetriever APIEvaluationPromptEvaluationTraceSpansType = "TRACE_SPAN_TYPE_RETRIEVER"
const APIEvaluationPromptEvaluationTraceSpansTypeTraceSpanTypeTool APIEvaluationPromptEvaluationTraceSpansType = "TRACE_SPAN_TYPE_TOOL"
GroundTruth stringoptional

The ground truth for the prompt.

Input stringoptional
InputTokens stringoptional

The number of input tokens used in the prompt.

formatuint64
Output stringoptional
OutputTokens stringoptional

The number of output tokens used in the prompt.

formatuint64
PromptChunks []APIEvaluationPromptPromptChunkoptional

The list of prompt chunks.

ChunkUsagePct float64optional

The usage percentage of the chunk.

formatdouble
ChunkUsed booloptional

Indicates if the chunk was used in the prompt.

IndexUuid stringoptional

The index uuid (Knowledge Base) of the chunk.

SourceName stringoptional

The source name for the chunk, e.g., the file name or document title.

Text stringoptional

Text content of the chunk.

PromptID int64optional

Prompt ID

formatint64
PromptLevelMetricResults []APIEvaluationMetricResultoptional

The metric results for the prompt.

ErrorDescription stringoptional

Error description if the metric could not be calculated.

MetricName stringoptional

Metric name

MetricValueType APIEvaluationMetricResultMetricValueTypeoptional
Accepts one of the following:
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeUnspecified APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_UNSPECIFIED"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeNumber APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_NUMBER"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeString APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_STRING"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypePercentage APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_PERCENTAGE"
NumberValue float64optional

The value of the metric as a number.

formatdouble
Reasoning stringoptional

Reasoning of the metric result.

StringValue stringoptional

The value of the metric as a string.

TraceID stringoptional

The trace id for the prompt.

Retrieve Results of an Evaluation Run Prompt
package main

import (
  "context"
  "fmt"

  "github.com/stainless-sdks/-go"
  "github.com/stainless-sdks/-go/option"
)

func main() {
  client := gradient.NewClient(
    option.WithAccessToken("My Access Token"),
  )
  response, err := client.Agents.EvaluationRuns.GetResults(
    context.TODO(),
    `"123e4567-e89b-12d3-a456-426614174000"`,
    int64(1),
  )
  if err != nil {
    panic(err.Error())
  }
  fmt.Printf("%+v\n", response.Prompt)
}
{
  "prompt": {
    "evaluation_trace_spans": [
      {
        "created_at": "2023-01-01T00:00:00Z",
        "input": {},
        "name": "example name",
        "output": {},
        "retriever_chunks": [
          {
            "chunk_usage_pct": 123,
            "chunk_used": true,
            "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
            "source_name": "example name",
            "text": "example string"
          }
        ],
        "span_level_metric_results": [
          {
            "error_description": "example string",
            "metric_name": "example name",
            "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
            "number_value": 123,
            "reasoning": "example string",
            "string_value": "example string"
          }
        ],
        "type": "TRACE_SPAN_TYPE_UNKNOWN"
      }
    ],
    "ground_truth": "example string",
    "input": "example string",
    "input_tokens": "12345",
    "output": "example string",
    "output_tokens": "12345",
    "prompt_chunks": [
      {
        "chunk_usage_pct": 123,
        "chunk_used": true,
        "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
        "source_name": "example name",
        "text": "example string"
      }
    ],
    "prompt_id": 123,
    "prompt_level_metric_results": [
      {
        "error_description": "example string",
        "metric_name": "example name",
        "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
        "number_value": 123,
        "reasoning": "example string",
        "string_value": "example string"
      }
    ],
    "trace_id": "123e4567-e89b-12d3-a456-426614174000"
  }
}
Returns Examples
{
  "prompt": {
    "evaluation_trace_spans": [
      {
        "created_at": "2023-01-01T00:00:00Z",
        "input": {},
        "name": "example name",
        "output": {},
        "retriever_chunks": [
          {
            "chunk_usage_pct": 123,
            "chunk_used": true,
            "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
            "source_name": "example name",
            "text": "example string"
          }
        ],
        "span_level_metric_results": [
          {
            "error_description": "example string",
            "metric_name": "example name",
            "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
            "number_value": 123,
            "reasoning": "example string",
            "string_value": "example string"
          }
        ],
        "type": "TRACE_SPAN_TYPE_UNKNOWN"
      }
    ],
    "ground_truth": "example string",
    "input": "example string",
    "input_tokens": "12345",
    "output": "example string",
    "output_tokens": "12345",
    "prompt_chunks": [
      {
        "chunk_usage_pct": 123,
        "chunk_used": true,
        "index_uuid": "123e4567-e89b-12d3-a456-426614174000",
        "source_name": "example name",
        "text": "example string"
      }
    ],
    "prompt_id": 123,
    "prompt_level_metric_results": [
      {
        "error_description": "example string",
        "metric_name": "example name",
        "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
        "number_value": 123,
        "reasoning": "example string",
        "string_value": "example string"
      }
    ],
    "trace_id": "123e4567-e89b-12d3-a456-426614174000"
  }
}