Skip to content

List Evaluation Runs by Test Case

client.Agents.EvaluationTestCases.ListEvaluationRuns(ctx, evaluationTestCaseUuid, query) (*AgentEvaluationTestCaseListEvaluationRunsResponse, error)
get/v2/gen-ai/evaluation_test_cases/{evaluation_test_case_uuid}/evaluation_runs

To list all evaluation runs by test case, send a GET request to /v2/gen-ai/evaluation_test_cases/{evaluation_test_case_uuid}/evaluation_runs.

ParametersExpand Collapse
evaluationTestCaseUuid string
query AgentEvaluationTestCaseListEvaluationRunsParams
EvaluationTestCaseVersion param.Field[int64]optional

Version of the test case.

ReturnsExpand Collapse
type AgentEvaluationTestCaseListEvaluationRunsResponse struct{…}
EvaluationRuns []APIEvaluationRunoptional

List of evaluation runs.

AgentDeleted booloptional

Whether agent is deleted

AgentDeploymentName stringoptional

The agent deployment name

AgentName stringoptional

Agent name

AgentUuid stringoptional

Agent UUID.

AgentVersionHash stringoptional

Version hash

AgentWorkspaceUuid stringoptional

Agent workspace uuid

CreatedByUserEmail stringoptional
CreatedByUserID stringoptional
formatuint64
ErrorDescription stringoptional

The error description

EvaluationRunUuid stringoptional

Evaluation run UUID.

EvaluationTestCaseWorkspaceUuid stringoptional

Evaluation test case workspace uuid

FinishedAt Timeoptional

Run end time.

formatdate-time
PassStatus booloptional

The pass status of the evaluation run based on the star metric.

QueuedAt Timeoptional

Run queued time.

formatdate-time
RunLevelMetricResults []APIEvaluationMetricResultoptional
ErrorDescription stringoptional

Error description if the metric could not be calculated.

MetricName stringoptional

Metric name

MetricValueType APIEvaluationMetricResultMetricValueTypeoptional
Accepts one of the following:
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeUnspecified APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_UNSPECIFIED"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeNumber APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_NUMBER"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeString APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_STRING"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypePercentage APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_PERCENTAGE"
NumberValue float64optional

The value of the metric as a number.

formatdouble
Reasoning stringoptional

Reasoning of the metric result.

StringValue stringoptional

The value of the metric as a string.

RunName stringoptional

Run name.

StarMetricResult APIEvaluationMetricResultoptional
ErrorDescription stringoptional

Error description if the metric could not be calculated.

MetricName stringoptional

Metric name

MetricValueType APIEvaluationMetricResultMetricValueTypeoptional
Accepts one of the following:
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeUnspecified APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_UNSPECIFIED"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeNumber APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_NUMBER"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypeString APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_STRING"
const APIEvaluationMetricResultMetricValueTypeMetricValueTypePercentage APIEvaluationMetricResultMetricValueType = "METRIC_VALUE_TYPE_PERCENTAGE"
NumberValue float64optional

The value of the metric as a number.

formatdouble
Reasoning stringoptional

Reasoning of the metric result.

StringValue stringoptional

The value of the metric as a string.

StartedAt Timeoptional

Run start time.

formatdate-time
Status APIEvaluationRunStatusoptional

Evaluation Run Statuses

Accepts one of the following:
const APIEvaluationRunStatusEvaluationRunStatusUnspecified APIEvaluationRunStatus = "EVALUATION_RUN_STATUS_UNSPECIFIED"
const APIEvaluationRunStatusEvaluationRunQueued APIEvaluationRunStatus = "EVALUATION_RUN_QUEUED"
const APIEvaluationRunStatusEvaluationRunRunningDataset APIEvaluationRunStatus = "EVALUATION_RUN_RUNNING_DATASET"
const APIEvaluationRunStatusEvaluationRunEvaluatingResults APIEvaluationRunStatus = "EVALUATION_RUN_EVALUATING_RESULTS"
const APIEvaluationRunStatusEvaluationRunCancelling APIEvaluationRunStatus = "EVALUATION_RUN_CANCELLING"
const APIEvaluationRunStatusEvaluationRunCancelled APIEvaluationRunStatus = "EVALUATION_RUN_CANCELLED"
const APIEvaluationRunStatusEvaluationRunSuccessful APIEvaluationRunStatus = "EVALUATION_RUN_SUCCESSFUL"
const APIEvaluationRunStatusEvaluationRunPartiallySuccessful APIEvaluationRunStatus = "EVALUATION_RUN_PARTIALLY_SUCCESSFUL"
const APIEvaluationRunStatusEvaluationRunFailed APIEvaluationRunStatus = "EVALUATION_RUN_FAILED"
TestCaseDescription stringoptional

Test case description.

TestCaseName stringoptional

Test case name.

TestCaseUuid stringoptional

Test-case UUID.

TestCaseVersion int64optional

Test-case-version.

formatint64
List Evaluation Runs by Test Case
package main

import (
  "context"
  "fmt"

  "github.com/stainless-sdks/-go"
  "github.com/stainless-sdks/-go/option"
)

func main() {
  client := gradient.NewClient(
    option.WithAccessToken("My Access Token"),
  )
  response, err := client.Agents.EvaluationTestCases.ListEvaluationRuns(
    context.TODO(),
    `"123e4567-e89b-12d3-a456-426614174000"`,
    gradient.AgentEvaluationTestCaseListEvaluationRunsParams{

    },
  )
  if err != nil {
    panic(err.Error())
  }
  fmt.Printf("%+v\n", response.EvaluationRuns)
}
{
  "evaluation_runs": [
    {
      "agent_deleted": true,
      "agent_deployment_name": "example name",
      "agent_name": "example name",
      "agent_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "agent_version_hash": "example string",
      "agent_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "created_by_user_email": "example@example.com",
      "created_by_user_id": "12345",
      "error_description": "example string",
      "evaluation_run_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "evaluation_test_case_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "finished_at": "2023-01-01T00:00:00Z",
      "pass_status": true,
      "queued_at": "2023-01-01T00:00:00Z",
      "run_level_metric_results": [
        {
          "error_description": "example string",
          "metric_name": "example name",
          "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
          "number_value": 123,
          "reasoning": "example string",
          "string_value": "example string"
        }
      ],
      "run_name": "example name",
      "star_metric_result": {
        "error_description": "example string",
        "metric_name": "example name",
        "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
        "number_value": 123,
        "reasoning": "example string",
        "string_value": "example string"
      },
      "started_at": "2023-01-01T00:00:00Z",
      "status": "EVALUATION_RUN_STATUS_UNSPECIFIED",
      "test_case_description": "example string",
      "test_case_name": "example name",
      "test_case_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "test_case_version": 123
    }
  ]
}
Returns Examples
{
  "evaluation_runs": [
    {
      "agent_deleted": true,
      "agent_deployment_name": "example name",
      "agent_name": "example name",
      "agent_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "agent_version_hash": "example string",
      "agent_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "created_by_user_email": "example@example.com",
      "created_by_user_id": "12345",
      "error_description": "example string",
      "evaluation_run_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "evaluation_test_case_workspace_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "finished_at": "2023-01-01T00:00:00Z",
      "pass_status": true,
      "queued_at": "2023-01-01T00:00:00Z",
      "run_level_metric_results": [
        {
          "error_description": "example string",
          "metric_name": "example name",
          "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
          "number_value": 123,
          "reasoning": "example string",
          "string_value": "example string"
        }
      ],
      "run_name": "example name",
      "star_metric_result": {
        "error_description": "example string",
        "metric_name": "example name",
        "metric_value_type": "METRIC_VALUE_TYPE_UNSPECIFIED",
        "number_value": 123,
        "reasoning": "example string",
        "string_value": "example string"
      },
      "started_at": "2023-01-01T00:00:00Z",
      "status": "EVALUATION_RUN_STATUS_UNSPECIFIED",
      "test_case_description": "example string",
      "test_case_name": "example name",
      "test_case_uuid": "123e4567-e89b-12d3-a456-426614174000",
      "test_case_version": 123
    }
  ]
}