https://oregon.cloud.databricks.com/.well-known/ai-plugin.json
{
"schema_version": "v1",
"name_for_human": "Databricks (dev-azure-westus)",
"name_for_model": "databricks",
"description_for_human": "A plugin that allows the user to interact with Databricks.",
"description_for_model": "A plugin that allows the user to interact with Databricks.",
"auth": {
"type": "user_http",
"authorization_type": "bearer"
},
"api": {
"type": "openapi",
"url": "https://westus.dev.azuredatabricks.net/openapi.yaml",
"is_user_authenticated": false
},
"logo_url": "https://westus.dev.azuredatabricks.net/logo.png",
"contact_email": "eng-product-infra-team@databricks.com",
"legal_info_url": "https://databricks.com/legal"
}
https://westus.dev.azuredatabricks.net/openapi.yaml
openapi: 3.0.1
info:
title: Databricks Plugin
description: Allows the user to interact with Databricks.
version: 'v1'
servers:
- url: 'https://westus.dev.azuredatabricks.net'
paths:
"/databricks-chatgpt-plugin/api/2.0/sql/statements/":
post:
# E2 dogfood: 791ba2a31c7fd70a, Dev: e10b9aafe6570b4c
description: 'Execute a SQL statement. You will need to get a warehouse ID by using WarehousesList.'
operationId: executeStatement
requestBody:
content:
application/json:
examples:
execute_statement_async:
summary: 'Call mode: asynchronous'
value:
statement: SELECT * FROM range(100)
wait_timeout: 0s
warehouse_id: abcdef0123456789
execute_statement_sync_cancel:
summary: 'Call mode: synchronous. Cancel on timeout'
value:
on_wait_timeout: CANCEL
statement: SELECT * FROM range(100)
wait_timeout: 30s
warehouse_id: abcdef0123456789
execute_statement_sync_continue:
summary: 'Call mode: synchronous. Continue on timeout'
value:
on_wait_timeout: CONTINUE
statement: SELECT * FROM range(100)
wait_timeout: 30s
warehouse_id: abcdef0123456789
execute_statement_sync_ext_links:
summary: Large result sets with EXTERNAL_LINKS + ARROW_STREAM
value:
disposition: EXTERNAL_LINKS
format: ARROW_STREAM
statement: SELECT * FROM range(100)
warehouse_id: abcdef0123456789
execute_statement_with_defaults:
summary: 'Call mode: synchronous. Deafult parameters'
value:
statement: SELECT * FROM range(3)
warehouse_id: abcdef0123456789
schema:
"$ref": "#/components/schemas/ExecuteStatementRequest"
required: true
responses:
'200':
content:
application/json:
schema:
"$ref": "#/components/responses/StatementResponse"
description: |
StatementResponse will contain `statement_id` and `status`; other fields may be absent or
present depending on context. See each field for its description.
default:
"$ref": "#/components/responses/ErrorResponse"
summary: Execute a SQL statement
tags:
- Statement Execution
/databricks-chatgpt-plugin/api/2.0/sql/warehouses:
get:
summary: List warehouses
operationId: WarehousesList
tags:
- SQL Warehouses
parameters:
- in: query
name: run_as_user_id
schema:
type: integer
description: >-
Service Principal which will be used to fetch the list of
warehouses.
If not specified, the user from the session header is used.
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/sql.ListWarehousesResponse'
description: ''
description: Lists all SQL warehouses that a user has manager permissions on.
x-databricks-crud: list
x-databricks-pagination:
results: warehouses
/databricks-chatgpt-plugin/api/2.1/jobs/create:
post:
security:
- bearerAuth: []
summary: Create a new job
description: Create a new job.
operationId: JobsCreate
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/CreateJob'
responses:
'200':
description: Job was created successfully
content:
application/json:
schema:
type: object
properties:
job_id:
type: integer
example: 11223344
description: The canonical identifier for the newly created job.
format: int64
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/list:
get:
security:
- bearerAuth: []
summary: List all jobs
description: Retrieves a list of jobs.
operationId: JobsList
parameters:
- name: limit
description: >-
The number of jobs to return. This value must be greater than 0 and
less or equal to 25. The default value is 20.
in: query
required: false
schema:
type: integer
maximum: 25
minimum: 1
default: 20
example: 25
- name: offset
description: >-
The offset of the first job to return, relative to the most recently
created job.
in: query
required: false
schema:
type: integer
minimum: 0
default: 0
example: 0
- name: name
description: A filter on the list based on the exact (case insensitive) job name.
in: query
required: false
schema:
type: string
example: A%20multitask%20job
- name: expand_tasks
description: Whether to include task and cluster details in the response.
in: query
required: false
schema:
default: false
type: boolean
example: false
responses:
'200':
description: List of jobs was retrieved successfully.
content:
application/json:
schema:
type: object
properties:
jobs:
type: array
description: The list of jobs.
items:
$ref: '#/components/schemas/Job'
has_more:
type: boolean
example: false
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/get:
get:
security:
- bearerAuth: []
operationId: JobsGet
summary: Get a single job
description: Retrieves the details for a single job.
parameters:
- name: job_id
description: >-
The canonical identifier of the job to retrieve information about.
This field is required.
in: query
required: true
schema:
example: 11223344
type: integer
format: int64
responses:
'200':
description: Job was retrieved successfully.
content:
application/json:
schema:
type: object
properties:
job_id:
type: integer
description: The canonical identifier for this job.
example: 11223344
format: int64
creator_user_name:
type: string
example: user.name@databricks.com
description: >-
The creator user name. This field won’t be included in the
response if the user has been deleted.
run_as_user_name:
type: string
example: user.name@databricks.com
description: >-
The user name that the job runs as. `run_as_user_name` is
based on the current job settings, and is set to the
creator of the job if job access control is disabled, or
the `is_owner` permission if job access control is
enabled.
settings:
description: >-
Settings for this job and all of its runs. These settings
can be updated using the
[Reset](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsReset)
or
[Update](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsUpdate)
endpoints.
$ref: '#/components/schemas/JobSettings'
created_time:
type: integer
example: 1601370337343
description: >-
The time at which this job was created in epoch
milliseconds (milliseconds since 1/1/1970 UTC).
format: int64
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/reset:
post:
security:
- bearerAuth: []
operationId: JobsReset
summary: Overwrites all settings for a job
description: >-
Overwrites all the settings for a specific job. Use the Update endpoint
to update job settings partially.
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- job_id
properties:
job_id:
type: integer
example: 11223344
description: >-
The canonical identifier of the job to reset. This field is
required.
format: int64
new_settings:
description: >-
The new settings of the job. These settings completely
replace the old settings.
Changes to the field `JobSettings.timeout_seconds` are
applied to active runs. Changes to other fields are applied
to future runs only.
$ref: '#/components/schemas/JobSettings'
responses:
'200':
description: Job was overwritten successfully.
content:
application/json:
schema:
type: object
properties: {}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/update:
post:
security:
- bearerAuth: []
operationId: JobsUpdate
summary: Partially updates a job
description: >-
Add, update, or remove specific settings of an existing job. Use the
Reset endpoint to overwrite all job settings.
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- job_id
properties:
job_id:
type: integer
example: 11223344
description: >-
The canonical identifier of the job to update. This field is
required.
format: int64
new_settings:
description: >-
The new settings for the job. Any top-level fields specified
in `new_settings` are completely replaced. Partially
updating nested fields is not supported.
Changes to the field `JobSettings.timeout_seconds` are
applied to active runs. Changes to other fields are applied
to future runs only.
$ref: '#/components/schemas/JobSettings'
fields_to_remove:
type: array
description: >-
Remove top-level fields in the job settings. Removing nested
fields is not supported. This field is optional.
example:
- libraries
- schedule
items:
type: string
responses:
'200':
description: Job was updated successfully.
content:
application/json:
schema:
type: object
properties: {}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/delete:
post:
security:
- bearerAuth: []
summary: Delete a job
description: Deletes a job.
operationId: JobsDelete
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- job_id
properties:
job_id:
type: integer
example: 11223344
description: >-
The canonical identifier of the job to delete. This field is
required.
format: int64
responses:
'200':
description: Job was deleted successfully.
content:
application/json:
schema:
type: object
properties: {}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/run-now:
post:
security:
- bearerAuth: []
operationId: JobsRunNow
summary: Trigger a new job run
description: Run a job and return the `run_id` of the triggered run.
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- job_id
properties:
job_id:
type: integer
description: The ID of the job to be executed
example: 11223344
format: int64
idempotency_token:
type: string
example: 8f018174-4792-40d5-bcbc-3e6a527352c8
description: >-
An optional token to guarantee the idempotency of job run requests.
If a run with the provided token already exists, the request does
not create a new run but returns the ID of the existing run instead.
If a run with the provided token is deleted, an error is returned.
If you specify the idempotency token, upon failure you can retry
until the request succeeds. Databricks guarantees that exactly one
run is launched with that idempotency token.
This token must have at most 64 characters.
For more information, see [How to ensure idempotency for
jobs](https://kb.databricks.com/jobs/jobs-idempotency.html).
jar_params:
type: array
example:
- john
- doe
- '35'
description: >-
A list of parameters for jobs with Spark JAR tasks, for example
`"jar_params": ["john doe", "35"]`. The parameters are used to
invoke the main function of the main class specified in the Spark
JAR task. If not specified upon `run-now`, it defaults to an empty
list. jar_params cannot be specified in conjunction with
notebook_params. The JSON representation of this field (for example
`{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
items:
type: string
notebook_params:
type: object
example:
name: john doe
age: '35'
description: >-
A map from keys to values for jobs with notebook task, for example
`"notebook_params": {"name": "john doe", "age": "35"}`. The map is
passed to the notebook and is accessible through the
[dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets)
function.
If not specified upon `run-now`, the triggered run uses the job’s
base parameters.
notebook_params cannot be specified in conjunction with jar_params.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
The JSON representation of this field (for example
`{"notebook_params":{"name":"john doe","age":"35"}}`) cannot exceed
10,000 bytes.
additionalProperties: true
python_params:
type: array
example:
- john doe
- '35'
description: >-
A list of parameters for jobs with Python tasks, for example
`"python_params": ["john doe", "35"]`. The parameters are passed to
Python file as command-line parameters. If specified upon `run-now`,
it would overwrite the parameters specified in job setting. The JSON
representation of this field (for example `{"python_params":["john
doe","35"]}`) cannot exceed 10,000 bytes.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
Important
These parameters accept only Latin characters (ASCII character set).
Using non-ASCII characters returns an error. Examples of invalid,
non-ASCII characters are Chinese, Japanese kanjis, and emojis.
items:
type: string
spark_submit_params:
type: array
example:
- '--class'
- org.apache.spark.examples.SparkPi
description: >-
A list of parameters for jobs with spark submit task, for example
`"spark_submit_params": ["--class",
"org.apache.spark.examples.SparkPi"]`. The parameters are passed to
spark-submit script as command-line parameters. If specified upon
`run-now`, it would overwrite the parameters specified in job
setting. The JSON representation of this field (for example
`{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
Important
These parameters accept only Latin characters (ASCII character set).
Using non-ASCII characters returns an error. Examples of invalid,
non-ASCII characters are Chinese, Japanese kanjis, and emojis.
items:
type: string
python_named_params:
type: object
example:
name: task
data: dbfs:/path/to/data.json
description: >-
A map from keys to values for jobs with Python wheel task, for
example `"python_named_params": {"name": "task", "data":
"dbfs:/path/to/data.json"}`.
pipeline_params:
properties:
full_refresh:
type: boolean
description: If true, triggers a full refresh on the delta live table.
sql_params:
type: object
example:
name: john doe
age: '35'
description: >-
A map from keys to values for SQL tasks, for example `"sql_params":
{"name": "john doe", "age": "35"}`. The SQL alert task does not
support custom parameters.
dbt_commands:
type: array
example:
- dbt deps
- dbt seed
- dbt run
description: >-
An array of commands to execute for jobs with the dbt task, for
example `"dbt_commands": ["dbt deps", "dbt seed", "dbt run"]`
responses:
'200':
description: Run was started successfully.
content:
application/json:
schema:
type: object
properties:
run_id:
type: integer
example: 455644833
description: The globally unique ID of the newly triggered run.
format: int64
number_in_job:
deprecated: true
example: 455644833
type: integer
description: >-
A unique identifier for this job run. This is set to the
same value as `run_id`.
format: int64
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/submit:
post:
security:
- bearerAuth: []
operationId: JobsRunsSubmit
summary: Create and trigger a one-time run
description: >-
Submit a one-time run. This endpoint allows you to submit a workload
directly without creating a job. Use the `jobs/runs/get` API to check
the run state after the job is submitted.
requestBody:
required: true
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/RunSubmitSettings'
- $ref: '#/components/schemas/AccessControlList'
responses:
'200':
description: Run was created and started successfully.
content:
application/json:
schema:
type: object
properties:
run_id:
type: integer
description: The canonical identifier for the newly submitted run.
example: 455644833
format: int64
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/list:
get:
security:
- bearerAuth: []
operationId: JobsRunsList
summary: List runs for a job
description: List runs in descending order by start time.
parameters:
- name: active_only
description: >-
If active_only is `true`, only active runs are included in the
results; otherwise, lists both active and completed runs. An active
run is a run in the `PENDING`, `RUNNING`, or `TERMINATING`. This
field cannot be `true` when completed_only is `true`.
in: query
schema:
default: false
example: false
type: boolean
- name: completed_only
description: >-
If completed_only is `true`, only completed runs are included in the
results; otherwise, lists both active and completed runs. This field
cannot be `true` when active_only is `true`.
in: query
schema:
default: false
example: false
type: boolean
- name: job_id
description: >-
The job for which to list runs. If omitted, the Jobs service lists
runs from all jobs.
in: query
schema:
example: 11223344
type: integer
format: int64
- name: offset
description: >-
The offset of the first run to return, relative to the most recent
run.
in: query
schema:
default: 0
example: 0
type: integer
format: int32
- name: limit
description: >-
The number of runs to return. This value must be greater than 0 and
less than 25\. The default value is 25\. If a request specifies a
limit of 0, the service instead uses the maximum limit.
in: query
schema:
type: integer
format: int32
default: 25
example: 25
minimum: 1
maximum: 25
- name: run_type
description: >-
The type of runs to return. For a description of run types, see
[Run](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsGet).
in: query
schema:
type: string
example: JOB_RUN
enum:
- JOB_RUN
- WORKFLOW_RUN
- SUBMIT_RUN
- name: expand_tasks
description: Whether to include task and cluster details in the response.
in: query
required: false
schema:
default: false
example: false
type: boolean
- name: start_time_from
description: >-
Show runs that started _at or after_ this value. The value must be a
UTC timestamp in milliseconds. Can be combined with _start_time_to_
to filter by a time range.
in: query
required: false
schema:
example: 1642521600000
type: integer
- name: start_time_to
description: >-
Show runs that started _at or before_ this value. The value must be
a UTC timestamp in milliseconds. Can be combined with
_start_time_from_ to filter by a time range.
in: query
required: false
schema:
example: 1642608000000
type: integer
responses:
'200':
description: List of runs was retrieved successfully.
content:
application/json:
schema:
type: object
properties:
runs:
type: array
description: A list of runs, from most recently started to least.
items:
$ref: '#/components/schemas/Run'
has_more:
type: boolean
description: >-
If true, additional runs matching the provided filter are
available for listing.
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/get:
get:
security:
- bearerAuth: []
summary: Get a single job run
operationId: JobsRunsGet
description: Retrieve the metadata of a run.
parameters:
- name: run_id
required: true
description: >-
The canonical identifier of the run for which to retrieve the
metadata. This field is required.
in: query
schema:
type: integer
example: 455644833
format: int64
- name: include_history
required: false
description: Whether to include the repair history in the response.
in: query
schema:
type: boolean
example: true
responses:
'200':
description: Run was retrieved successfully
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/Run'
- $ref: '#/components/schemas/RepairHistory'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/2.0/jobs/runs/export:
get:
security:
- bearerAuth: []
operationId: JobsRunsExport
summary: Export and retrieve a job run
description: Export and retrieve the job run task.
parameters:
- name: run_id
required: true
description: The canonical identifier for the run. This field is required.
in: query
schema:
type: integer
example: 455644833
format: int64
- name: views_to_export
description: Which views to export (CODE, DASHBOARDS, or ALL). Defaults to CODE.
in: query
schema:
$ref: '#/components/schemas/ViewsToExport'
responses:
'200':
description: Run was exported successfully.
content:
application/json:
schema:
type: object
properties:
views:
type: array
description: >-
The exported content in HTML format (one for every view
item).
items:
$ref: '#/components/schemas/ViewItem'
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/cancel:
post:
security:
- bearerAuth: []
operationId: JobsRunsCancel
summary: Cancel a job run
description: >-
Cancels a job run. The run is canceled asynchronously, so it may still
be running when this request completes.
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- run_id
properties:
run_id:
type: integer
description: This field is required.
example: 455644833
format: int64
responses:
'200':
description: Run was cancelled successfully.
content:
application/json:
schema:
type: object
properties: {}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/cancel-all:
post:
security:
- bearerAuth: []
operationId: JobsRunsCancelAll
summary: Cancel all runs of a job
description: >-
Cancels all active runs of a job. The runs are canceled asynchronously,
so it doesn't prevent new runs from being started.
requestBody:
required: true
content:
application/json:
schema:
type: object
required:
- job_id
properties:
job_id:
type: integer
description: >-
The canonical identifier of the job to cancel all runs of.
This field is required.
example: 11223344
format: int64
responses:
'200':
description: All runs were cancelled successfully.
content:
application/json:
schema:
type: object
properties: {}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/get-output:
get:
security:
- bearerAuth: []
operationId: JobsRunsGetOutput
summary: Get the output for a single run
description: >-
Retrieve the output and metadata of a single task run.
parameters:
- name: run_id
required: true
description: The canonical identifier for the run. This field is required.
in: query
schema:
type: integer
example: 455644833
format: int64
responses:
'200':
description: Run output was retrieved successfully.
content:
application/json:
schema:
type: object
properties:
notebook_output:
description: >-
The output of a notebook task, if available. A notebook
task that terminates (either successfully or with a
failure) without calling `dbutils.notebook.exit()` is
considered to have an empty output. This field is set but
its result value is empty. Databricks restricts this API
to return the first 5 MB of the output. To return a larger
result, use the
[ClusterLogConf](https://docs.databricks.com/dev-tools/api/latest/clusters.html#clusterlogconf)
field to configure log storage for the job cluster.
example: ''
$ref: '#/components/schemas/NotebookOutput'
sql_output:
description: The output of a SQL task, if available.
example: ''
$ref: '#/components/schemas/SqlOutput'
dbt_output:
description: The output of a dbt task, if available.
example: ''
$ref: '#/components/schemas/DbtOutput'
logs:
type: string
example: Hello World!
description: >-
The output from tasks that write to standard streams (stdout/stderr)
logs_truncated:
type: boolean
example: true
description: Whether the logs are truncated.
error:
type: string
example: 'ZeroDivisionError: integer division or modulo by zero'
description: >-
An error message indicating why a task failed or why
output is not available. The message is unstructured, and
its exact format is subject to change.
error_trace:
type: string
example: >-
---------------------------------------------------------------------------
Exception Traceback (most
recent call last)
1 numerator = 42
2 denominator = 0
----> 3 return numerator / denominator
ZeroDivisionError: integer division or modulo by zero
description: >-
If there was an error executing the run, this field
contains any available stack traces.
metadata:
description: All details of the run except for its output.
$ref: '#/components/schemas/Run'
'400':
description: A job run with multiple tasks was provided.
content:
application/json:
example:
error_code: INVALID_PARAMETER_VALUE
message: >-
Retrieving the output of runs with multiple tasks is not
supported. You must retrieve the output of each individual
task run instead.
schema:
$ref: '#/components/schemas/Error'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/delete:
post:
security:
- bearerAuth: []
operationId: JobsRunsDelete
summary: Delete a job run
description: Deletes a non-active run. Returns an error if the run is active.
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
run_id:
type: integer
example: 455644833
description: >-
The canonical identifier of the run for which to retrieve
the metadata.
format: int64
responses:
'200':
description: Run was deleted successfully.
content:
application/json:
schema:
type: object
properties: {}
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.1/jobs/runs/repair:
post:
security:
- bearerAuth: []
operationId: JobsRunsRepair
summary: Repair a job run
description: >-
Re-run one or more tasks. Tasks are re-run as part of the original job
run, use the current job and task settings, and can be viewed in the
history for the original job run.
requestBody:
required: true
content:
application/json:
schema:
allOf:
- $ref: '#/components/schemas/RepairRunInput'
- $ref: '#/components/schemas/RunParameters'
responses:
'200':
description: Run repair was initiated.
content:
application/json:
schema:
type: object
properties:
repair_id:
description: The ID of the repair.
type: integer
format: int64
example: 734650698524280
'400':
$ref: '#/components/responses/BadRequest'
'401':
$ref: '#/components/responses/Unauthorized'
'500':
$ref: '#/components/responses/InternalError'
/databricks-chatgpt-plugin/api/2.0/workspace/import:
post:
summary: Import a workspace object
operationId: WorkspaceImport
tags:
- Workspace
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ImportResponse'
description: ''
requestBody:
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.Import'
description: ''
description: >
Imports a workspace object (for example, a notebook or file) or the
contents of an entire directory.
If `path` already exists and `overwrite` is set to `false`, this call
returns an error `RESOURCE_ALREADY_EXISTS`.
One can only use `DBC` format to import a directory.
x-databricks-crud: create
x-codeSamples:
- lang: Terraform
label: Terraform
source: |
resource "databricks_notebook" "notebook" {
content_base64 = base64encode(<<-EOT
# created from ${abspath(path.module)}
display(spark.range(10))
EOT
)
path = "/Shared/Demo"
language = "PYTHON"
}
/databricks-chatgpt-plugin/api/2.0/workspace/export:
get:
summary: Export a workspace object
operationId: WorkspaceExport
tags:
- Workspace
parameters:
- required: true
in: query
name: path
schema:
type: string
description: >-
The absolute path of the object or directory. Exporting a directory
is only supported for the `DBC` format.
- in: query
name: format
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ExportFormat'
description: >
This specifies the format of the exported file. By default, this is
`SOURCE`.
However it may be one of: `SOURCE`, `HTML`, `JUPYTER`, `DBC`.
The value is case sensitive.
- in: query
name: direct_download
schema:
type: boolean
description: >
Flag to enable direct download. If it is `true`, the response will
be the exported file itself.
Otherwise, the response contains content as base64 encoded string.
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ExportResponse'
description: ''
description: >
Exports an object or the contents of an entire directory.
If `path` does not exist, this call returns an error
`RESOURCE_DOES_NOT_EXIST`.
/databricks-chatgpt-plugin/api/2.0/workspace/list:
get:
summary: List contents
operationId: WorkspaceList
tags:
- Workspace
parameters:
- required: true
in: query
name: path
schema:
type: string
description: The absolute path of the notebook or directory.
- in: query
name: notebooks_modified_after
schema:
type: integer
description: <content needed>
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ListResponse'
description: ''
description: >
Lists the contents of a directory, or the object if it is not a
directory.If
the input path does not exist, this call returns an error
`RESOURCE_DOES_NOT_EXIST`.
x-databricks-crud: list
x-databricks-pagination:
results: objects
/databricks-chatgpt-plugin/api/2.0/workspace/mkdirs:
post:
summary: Create a directory
operationId: WorkspaceMkdirs
tags:
- Workspace
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.MkdirsResponse'
description: ''
requestBody:
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.Mkdirs'
description: ''
description: >
Creates the specified directory (and necessary parent directories if
they do not exist).
If there is an object (not a directory) at any prefix of the input path,
this call returns an error `RESOURCE_ALREADY_EXISTS`.
/databricks-chatgpt-plugin/api/2.0/repos:
get:
summary: Get repos
operationId: ReposList
tags:
- Repos
parameters:
- in: query
name: path_prefix
schema:
type: string
description: Filters repos that have paths starting with the given path prefix.
- in: query
name: next_page_token
schema:
type: string
description:
Token used to get the next page of results. If not specified,
returns the first page of results as well as a next page token if
there are more results.
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ListReposResponse'
description: Repos were successfully returned.
description: >-
Returns repos that the calling user has Manage permissions on. Results
are paginated with each page containing twenty repos.
x-databricks-crud: list
x-databricks-pagination:
results: repos
token:
request: next_page_token
response: next_page_token
post:
summary: Create a repo
operationId: ReposCreate
tags:
- Repos
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.RepoInfo'
description: The repo was successfully created.
requestBody:
required: true
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.CreateRepo'
description: ''
description: >-
Creates a repo in the workspace and links it to the remote Git repo
specified.
Note that repos created programmatically must be linked to a remote Git
repo, unlike repos created in the browser.
x-databricks-crud: create
x-codeSamples:
- lang: Terraform
label: Terraform
source: |
resource "databricks_repo" "this" {
url = "https://github.com/user/demo.git"
}
/databricks-chatgpt-plugin/api/2.0/repos/{repo_id}:
get:
summary: Get a repo
parameters:
- required: true
in: path
name: repo_id
schema:
format: int64
type: integer
description: The ID for the corresponding repo to access.
operationId: ReposGet
tags:
- Repos
responses:
'200':
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.RepoInfo'
description: The repo was successfully returned.
description: Returns the repo with the given repo ID.
x-databricks-crud: read
patch:
summary: Update a repo
parameters:
- required: true
in: path
name: repo_id
schema:
format: int64
type: integer
description: The ID for the corresponding repo to access.
operationId: ReposUpdate
tags:
- Repos
responses:
'200':
content:
application/json:
schema:
properties: {}
type: object
description: The repo was successfully updated.
requestBody:
required: true
content:
application/json:
schema:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.UpdateRepo'
description: Details required to update the repo
description: >-
Updates the repo to a different branch or tag, or updates the repo to
the latest commit on the same branch.
x-databricks-crud: update
delete:
summary: Delete a repo
parameters:
- required: true
in: path
name: repo_id
schema:
format: int64
type: integer
description: The ID for the corresponding repo to access.
operationId: ReposDelete
tags:
- Repos
responses:
'200':
description: The repo was successfully deleted.
description: Deletes the specified repo.
x-databricks-crud: delete
components:
securitySchemes:
bearerAuth:
type: http
scheme: bearer
bearerFormat: api_token
schemas:
ExecuteStatementRequest:
properties:
schema:
description: 'Sets default schema for statement execution, similar to [`USE
SCHEMA`](https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-schema.html)
in SQL.
'
type: string
statement:
description: SQL statement to execute
example: SELECT * FROM range(10)
type: string
wait_timeout:
description: |
The time in seconds the API service will wait for the statement's result set as `Ns`, where `N` can be set to 0 or to a value between 5 and 50.
When set to '0s' the statement will execute in asynchronous mode."
type: string
warehouse_id:
"$ref": "#/components/schemas/WarehouseId"
type: object
WarehouseId:
description: 'Warehouse upon which to execute a statement. See also [What are
SQL warehouses?](https://docs.databricks.com/sql/admin/warehouse-type.html)
'
type: string
ClusterInstance:
properties:
cluster_id:
type: string
example: 0923-164208-meows279
description: >-
The canonical identifier for the cluster used by a run. This field
is always available for runs on existing clusters. For runs on new
clusters, it becomes available once the cluster is created. This
value can be used to view logs by browsing to
`/#setting/sparkui/$cluster_id/driver-logs`. The logs continue to be
available after the run completes.
The response won’t include this field if the identifier is not
available yet.
spark_context_id:
type: string
description: >-
The canonical identifier for the Spark context used by a run. This
field is filled in once the run begins execution. This value can be
used to view the Spark UI by browsing to
`/#setting/sparkui/$cluster_id/$spark_context_id`. The Spark UI
continues to be available after the run has completed.
The response won’t include this field if the identifier is not
available yet.
ClusterSpec:
properties:
existing_cluster_id:
type: string
example: 0923-164208-meows279
description: >-
If existing_cluster_id, the ID of an existing cluster that is used
for all runs of this job. When running jobs on an existing cluster,
you may need to manually restart the cluster if it stops responding.
We suggest running jobs on new clusters for greater reliability.
new_cluster:
example: null
description: >-
If new_cluster, a description of a cluster that is created for each
run.
$ref: '#/components/schemas/NewCluster'
libraries:
type: array
description: >-
An optional list of libraries to be installed on the cluster that
executes the job. The default value is an empty list.
items:
$ref: '#/components/schemas/Library'
CronSchedule:
required:
- quartz_cron_expression
- timezone_id
properties:
quartz_cron_expression:
type: string
example: 20 30 * * * ?
description: >-
A Cron expression using Quartz syntax that describes the schedule
for a job. See [Cron
Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)
for details. This field is required.
timezone_id:
type: string
example: Europe/London
description: >-
A Java timezone ID. The schedule for a job is resolved with respect
to this timezone. See [Java
TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html)
for details. This field is required.
pause_status:
type: string
enum:
- PAUSED
- UNPAUSED
example: PAUSED
description: Indicate whether this schedule is paused or not.
Continuous:
properties:
pause_status:
type: string
enum:
- PAUSED
- UNPAUSED
example: PAUSED
description: >-
Indicate whether the continuous execution of the job is paused or
not. Defaults to UNPAUSED.
GitSource:
description: >-
This functionality is in Public Preview.
An optional specification for a remote repository containing the
notebooks used by this job's notebook tasks.
example:
git_url: https://github.com/databricks/databricks-cli
git_branch: main
git_provider: gitHub
properties:
git_url:
type: string
example: https://github.com/databricks/databricks-cli
required: true
description: |-
URL of the repository to be cloned by this job.
The maximum length is 300 characters.
git_provider:
type: string
enum:
- gitHub
- bitbucketCloud
- azureDevOpsServices
- gitHubEnterprise
- bitbucketServer
- gitLab
- gitLabEnterpriseEdition
- awsCodeCommit
example: github
required: true
description: >-
Unique identifier of the service used to host the Git repository.
The value is case insensitive.
git_branch:
type: string
example: main
description: >-
Name of the branch to be checked out and used by this job. This
field cannot be specified in conjunction with git_tag or git_commit.
The maximum length is 255 characters.
git_tag:
type: string
example: release-1.0.0
description: >-
Name of the tag to be checked out and used by this job. This field
cannot be specified in conjunction with git_branch or git_commit.
The maximum length is 255 characters.
git_commit:
type: string
example: e0056d01
description: >-
Commit to be checked out and used by this job. This field cannot be
specified in conjunction with git_branch or git_tag.
The maximum length is 64 characters.
git_snapshot:
$ref: '#/components/schemas/GitSnapshot'
oneOf:
- required:
- git_url
- git_provider
- git_branch
- required:
- git_url
- git_provider
- git_tag
- required:
- git_url
- git_provider
- git_commit
GitSnapshot:
readOnly: true
description: >-
Read-only state of the remote repository at the time the job was run.
This field is only included on job runs.
properties:
used_commit:
type: string
example: 4506fdf41e9fa98090570a34df7a5bce163ff15f
description: >-
Commit that was used to execute the run. If git_branch was
specified, this points to the HEAD of the branch at the time of the
run; if git_tag was specified, this points to the commit the tag
points to.
Job:
properties:
job_id:
type: integer
description: The canonical identifier for this job.
example: 11223344
format: int64
creator_user_name:
type: string
example: user.name@databricks.com
description: >-
The creator user name. This field won’t be included in the response
if the user has already been deleted.
settings:
description: >-
Settings for this job and all of its runs. These settings can be
updated using the `resetJob` method.
$ref: '#/components/schemas/JobSettings'
created_time:
type: integer
example: 1601370337343
description: >-
The time at which this job was created in epoch milliseconds
(milliseconds since 1/1/1970 UTC).
format: int64
WebhookNotifications:
properties:
on_start:
type: array
example:
- id: 03dd86e4-57ef-4818-a950-78e41a1d71ab
- id: 0481e838-0a59-4eff-9541-a4ca6f149574
items:
type: object
properties:
id:
type: string
description: >-
An optional list of notification IDs to call when the run starts. A
maximum of 3 destinations can be specified for the `on_start`
property.
on_success:
type: array
example:
- id: 03dd86e4-57ef-4818-a950-78e41a1d71ab
items:
type: object
properties:
id:
type: string
description: >-
An optional list of notification IDs to call when the run completes
successfully. A maximum of 3 destinations can be specified for the
`on_success` property.
on_failure:
type: array
example:
- id: 0481e838-0a59-4eff-9541-a4ca6f149574
items:
type: object
properties:
id:
type: string
description: >-
An optional list of notification IDs to call when the run fails. A
maximum of 3 destinations can be specified for the `on_failure`
property.
JobEmailNotifications:
properties:
on_start:
type: array
description: >-
A list of email addresses to be notified when a run begins. If not
specified on job creation, reset, or update, the list is empty, and
notifications are not sent.
example:
- user.name@databricks.com
items:
type: string
on_success:
type: array
description: >-
A list of email addresses to be notified when a run successfully
completes. A run is considered to have completed successfully if it
ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL`
result_state. If not specified on job creation, reset, or update,
the list is empty, and notifications are not sent.
example:
- user.name@databricks.com
items:
type: string
on_failure:
type: array
description: >-
A list of email addresses to notify when a run completes
unsuccessfully. A run is considered unsuccessful if it ends with an
`INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or
`TIMED_OUT` `result_state`. If not specified on job creation, reset,
or update, or the list is empty, then notifications are not sent.
Job-level failure notifications are sent only once after the entire
job run (including all of its retries) has failed. Notifications are
not sent when failed job runs are retried. To receive a failure
notification after every failed task (including every failed retry),
use task-level notifications instead.
example:
- user.name@databricks.com
items:
type: string
no_alert_for_skipped_runs:
type: boolean
example: false
description: >-
If true, do not send email to recipients specified in `on_failure`
if the run is skipped.
CreateJob:
type: object
properties:
access_control_list:
type: array
description: List of permissions to set on the job.
items:
$ref: '#/components/schemas/AccessControlRequest'
name:
type: string
example: A multitask job
default: Untitled
description: An optional name for the job.
tags:
type: object
example:
cost-center: engineering
team: jobs
default: '{}'
description: >-
A map of tags associated with the job. These are forwarded to the
cluster as cluster tags for jobs clusters, and are subject to the
same limitations as cluster tags. A maximum of 25 tags can be added
to the job.
tasks:
type: array
maxItems: 100
description: A list of task specifications to be executed by this job.
items:
$ref: '#/components/schemas/JobTaskSettings'
example:
- task_key: Sessionize
description: Extracts session data from events, this is required.
depends_on: [ ]
existing_cluster_id: 0923-164208-meows279
spark_jar_task:
main_class_name: com.databricks.Sessionize
parameters:
- '--data'
- dbfs:/path/to/data.json
libraries:
- jar: dbfs:/mnt/databricks/Sessionize.jar
timeout_seconds: 86400
max_retries: 3
min_retry_interval_millis: 2000
retry_on_timeout: false
- task_key: Orders_Ingest
description: Ingests order data
depends_on: [ ]
job_cluster_key: auto_scaling_cluster
spark_jar_task:
main_class_name: com.databricks.OrdersIngest
parameters:
- '--data'
- dbfs:/path/to/order-data.json
libraries:
- jar: dbfs:/mnt/databricks/OrderIngest.jar
timeout_seconds: 86400
max_retries: 3
min_retry_interval_millis: 2000
retry_on_timeout: false
- task_key: Match
description: Matches orders with user sessions
depends_on:
- task_key: Orders_Ingest
- task_key: Sessionize
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
notebook_task:
notebook_path: /Users/user.name@databricks.com/Match
source: WORKSPACE
base_parameters:
name: John Doe
age: '35'
timeout_seconds: 86400
max_retries: 3
min_retry_interval_millis: 2000
retry_on_timeout: false
job_clusters:
type: array
maxItems: 100
description: >-
A list of job cluster specifications that can be shared and reused
by tasks of this job. Libraries cannot be declared in a shared job
cluster. You must declare dependent libraries in task settings.
items:
$ref: '#/components/schemas/JobCluster'
example:
- job_cluster_key: auto_scaling_cluster
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
email_notifications:
description: >-
An optional set of email addresses that is notified when runs of
this job begin or complete as well as when this job is deleted. The
default behavior is to not send any emails.
$ref: '#/components/schemas/JobEmailNotifications'
webhook_notifications:
description: >-
A collection of system notification IDs to notify when runs of this
job begin or complete. The default behavior is to not send any
system notifications.
$ref: '#/components/schemas/WebhookNotifications'
timeout_seconds:
type: integer
example: 86400
description: >-
An optional timeout applied to each run of this job. The default
behavior is to have no timeout.
format: int32
schedule:
description: >-
An optional periodic schedule for this job. The default behavior is
that the job only runs when triggered by clicking “Run Now” in the
Jobs UI or sending an API request to `runNow`.
$ref: '#/components/schemas/CronSchedule'
continuous:
description: >-
An optional continuous property for this job. The continuous
property will ensure that there is always one run executing. Only
one of `schedule` and `continuous` can be used.
$ref: '#/components/schemas/Continuous'
max_concurrent_runs:
type: integer
example: 10
description: >-
An optional maximum allowed number of concurrent runs of the job.
Set this value if you want to be able to execute multiple runs of
the same job concurrently. This is useful for example if you trigger
your job on a frequent schedule and want to allow consecutive runs
to overlap with each other, or if you want to trigger multiple runs
which differ by their input parameters.
This setting affects only new runs. For example, suppose the job’s
concurrency is 4 and there are 4 concurrent active runs. Then
setting the concurrency to 3 won’t kill any of the active runs.
However, from then on, new runs are skipped unless there are fewer
than 3 active runs.
This value cannot exceed 1000\. Setting this value to 0 causes all
new runs to be skipped. The default behavior is to allow only 1
concurrent run.
format: int32
git_source:
example:
git_url: https://github.com/databricks/databricks-cli
git_branch: main
git_provider: gitHub
description: >-
This functionality is in Public Preview.
An optional specification for a remote repository containing the
notebooks used by this job's notebook tasks.
$ref: '#/components/schemas/GitSource'
format:
type: string
enum:
- SINGLE_TASK
- MULTI_TASK
example: MULTI_TASK
description: >-
Used to tell what is the format of the job. This field is ignored in
Create/Update/Reset calls. When using the Jobs API 2.1 this value is
always set to `"MULTI_TASK"`.
JobSettings:
type: object
properties:
name:
type: string
example: A multitask job
default: Untitled
description: An optional name for the job.
tags:
type: object
example:
cost-center: engineering
team: jobs
default: '{}'
description: >-
A map of tags associated with the job. These are forwarded to the
cluster as cluster tags for jobs clusters, and are subject to the
same limitations as cluster tags. A maximum of 25 tags can be added
to the job.
tasks:
type: array
maxItems: 100
description: A list of task specifications to be executed by this job.
items:
$ref: '#/components/schemas/JobTaskSettings'
example:
- task_key: Sessionize
description: Extracts session data from events
depends_on: []
existing_cluster_id: 0923-164208-meows279
spark_jar_task:
main_class_name: com.databricks.Sessionize
parameters:
- '--data'
- dbfs:/path/to/data.json
libraries:
- jar: dbfs:/mnt/databricks/Sessionize.jar
timeout_seconds: 86400
max_retries: 3
min_retry_interval_millis: 2000
retry_on_timeout: false
- task_key: Orders_Ingest
description: Ingests order data
depends_on: []
job_cluster_key: auto_scaling_cluster
spark_jar_task:
main_class_name: com.databricks.OrdersIngest
parameters:
- '--data'
- dbfs:/path/to/order-data.json
libraries:
- jar: dbfs:/mnt/databricks/OrderIngest.jar
timeout_seconds: 86400
max_retries: 3
min_retry_interval_millis: 2000
retry_on_timeout: false
- task_key: Match
description: Matches orders with user sessions
depends_on:
- task_key: Orders_Ingest
- task_key: Sessionize
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
notebook_task:
notebook_path: /Users/user.name@databricks.com/Match
source: WORKSPACE
base_parameters:
name: John Doe
age: '35'
timeout_seconds: 86400
max_retries: 3
min_retry_interval_millis: 2000
retry_on_timeout: false
job_clusters:
type: array
maxItems: 100
description: >-
A list of job cluster specifications that can be shared and reused
by tasks of this job. Libraries cannot be declared in a shared job
cluster. You must declare dependent libraries in task settings.
items:
$ref: '#/components/schemas/JobCluster'
example:
- job_cluster_key: auto_scaling_cluster
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
email_notifications:
description: >-
An optional set of email addresses that is notified when runs of
this job begin or complete as well as when this job is deleted. The
default behavior is to not send any emails.
$ref: '#/components/schemas/JobEmailNotifications'
webhook_notifications:
description: >-
A collection of system notification IDs to notify when runs of this
job begin or complete. The default behavior is to not send any
system notifications.
$ref: '#/components/schemas/WebhookNotifications'
timeout_seconds:
type: integer
example: 86400
description: >-
An optional timeout applied to each run of this job. The default
behavior is to have no timeout.
format: int32
schedule:
description: >-
An optional periodic schedule for this job. The default behavior is
that the job only runs when triggered by clicking “Run Now” in the
Jobs UI or sending an API request to `runNow`.
$ref: '#/components/schemas/CronSchedule'
continuous:
description: >-
An optional continuous property for this job. The continuous
property will ensure that there is always one run executing. Only
one of `schedule` and `continuous` can be used.
$ref: '#/components/schemas/Continuous'
max_concurrent_runs:
type: integer
example: 10
description: >-
An optional maximum allowed number of concurrent runs of the job.
Set this value if you want to be able to execute multiple runs of
the same job concurrently. This is useful for example if you trigger
your job on a frequent schedule and want to allow consecutive runs
to overlap with each other, or if you want to trigger multiple runs
which differ by their input parameters.
This setting affects only new runs. For example, suppose the job’s
concurrency is 4 and there are 4 concurrent active runs. Then
setting the concurrency to 3 won’t kill any of the active runs.
However, from then on, new runs are skipped unless there are fewer
than 3 active runs.
This value cannot exceed 1000\. Setting this value to 0 causes all
new runs to be skipped. The default behavior is to allow only 1
concurrent run.
format: int32
git_source:
example:
git_url: https://github.com/databricks/databricks-cli
git_branch: main
git_provider: gitHub
description: >-
This functionality is in Public Preview.
An optional specification for a remote repository containing the
notebooks used by this job's notebook tasks.
$ref: '#/components/schemas/GitSource'
format:
type: string
enum:
- SINGLE_TASK
- MULTI_TASK
example: MULTI_TASK
description: >-
Used to tell what is the format of the job. This field is ignored in
Create/Update/Reset calls. When using the Jobs API 2.1 this value is
always set to `"MULTI_TASK"`.
JobTask:
properties:
notebook_task:
description: >-
If notebook_task, indicates that this job must run a notebook. This
field may not be specified in conjunction with spark_jar_task.
$ref: '#/components/schemas/NotebookTask'
spark_jar_task:
description: If spark_jar_task, indicates that this job must run a JAR.
example: ''
$ref: '#/components/schemas/SparkJarTask'
spark_python_task:
description: >-
If spark_python_task, indicates that this job must run a Python
file.
$ref: '#/components/schemas/SparkPythonTask'
spark_submit_task:
description: >-
If spark_submit_task, indicates that this job must be launched by
the spark submit script.
$ref: '#/components/schemas/SparkSubmitTask'
pipeline_task:
description: If pipeline_task, indicates that this job must execute a Pipeline.
$ref: '#/components/schemas/PipelineTask'
python_wheel_task:
description: >-
If python_wheel_task, indicates that this job must execute a
PythonWheel.
$ref: '#/components/schemas/PythonWheelTask'
sql_task:
description: >-
If sql_task, indicates that this job must execute a SQL task. It
requires both Databricks SQL and a serverless or a pro SQL
warehouse.
$ref: '#/components/schemas/SqlTask'
dbt_task:
description: >-
If dbt_task, indicates that this must execute a dbt task. It
requires both Databricks SQL and the ability to use a serverless or
a pro SQL warehouse.
$ref: '#/components/schemas/DbtTask'
TaskKey:
type: string
minLength: 1
maxLength: 100
pattern: ^[\w\-]+$
example: Task_Key
description: >-
A unique name for the task. This field is used to refer to this task
from other tasks.
This field is required and must be unique within its parent job.
On Update or Reset, this field is used to reference the tasks to be
updated or reset.
The maximum length is 100 characters.
TaskDependencies:
type: array
example:
- task_key: Previous_Task_Key
- task_key: Other_Task_Key
description: >-
An optional array of objects specifying the dependency graph of the
task. All tasks specified in this field must complete successfully
before executing this task.
The key is `task_key`, and the value is the name assigned to the
dependent task.
This field is required when a job consists of more than one task.
items:
type: object
properties:
task_key:
type: string
RunIf:
type: string
default: ALL_SUCCESS
example: ALL_SUCCESS
enum:
- ALL_SUCCESS
- NONE_FAILED
- ALL_DONE
- AT_LEAST_ONE_SUCCESS
- AT_LEAST_ONE_FAILED
- ALL_FAILED
description: >-
A condition that determines whether the task should be executed or excluded. The condition
evaluates the state of the task's dependencies. For example, a task with a Run if condition of
AT_LEAST_ONE_FAILED will run if at least one of its dependencies failed.
TaskDescription:
type: string
maxLength: 4096
example: This is the description for this task.
description: |-
An optional description for this task.
The maximum length is 4096 bytes.
JobTaskSettings:
required:
- task_key
properties:
task_key:
$ref: '#/components/schemas/TaskKey'
description:
$ref: '#/components/schemas/TaskDescription'
depends_on:
$ref: '#/components/schemas/TaskDependencies'
run_if:
$ref: '#/components/schemas/RunIf'
existing_cluster_id:
type: string
example: 0923-164208-meows279
description: >-
If existing_cluster_id, the ID of an existing cluster that is used
for all runs of this task. When running tasks on an existing
cluster, you may need to manually restart the cluster if it stops
responding. We suggest running jobs on new clusters for greater
reliability.
new_cluster:
description: >-
If new_cluster, a description of a cluster that is created only for
this task.
example: null
$ref: '#/components/schemas/NewTaskCluster'
job_cluster_key:
type: string
minLength: 1
maxLength: 100
pattern: ^[\w\-]+$
description: >-
If job_cluster_key, this task is executed reusing the cluster
specified in `job.settings.job_clusters`.
notebook_task:
description: >-
If notebook_task, indicates that this task must run a notebook. This
field may not be specified in conjunction with spark_jar_task.
$ref: '#/components/schemas/NotebookTask'
spark_jar_task:
description: If spark_jar_task, indicates that this task must run a JAR.
$ref: '#/components/schemas/SparkJarTask'
spark_python_task:
description: >-
If spark_python_task, indicates that this task must run a Python
file.
$ref: '#/components/schemas/SparkPythonTask'
spark_submit_task:
description: >-
If spark_submit_task, indicates that this task must be launched by
the spark submit script. This task can run only on new clusters.
$ref: '#/components/schemas/TaskSparkSubmitTask'
pipeline_task:
description: If pipeline_task, indicates that this task must execute a Pipeline.
$ref: '#/components/schemas/PipelineTask'
python_wheel_task:
description: >-
If python_wheel_task, indicates that this job must execute a
PythonWheel.
$ref: '#/components/schemas/PythonWheelTask'
sql_task:
description: >-
If sql_task, indicates that this job must execute a SQL task. It
requires both Databricks SQL and a serverless or a pro SQL
warehouse.
$ref: '#/components/schemas/SqlTask'
dbt_task:
description: >-
If dbt_task, indicates that this must execute a dbt task. It
requires both Databricks SQL and the ability to use a serverless or
a pro SQL warehouse.
$ref: '#/components/schemas/DbtTask'
libraries:
type: array
description: >-
An optional list of libraries to be installed on the cluster that
executes the task. The default value is an empty list.
items:
$ref: '#/components/schemas/Library'
email_notifications:
description: >-
An optional set of email addresses that is notified when runs of
this task begin or complete as well as when this task is deleted.
The default behavior is to not send any emails.
$ref: '#/components/schemas/JobEmailNotifications'
timeout_seconds:
type: integer
example: 86400
description: >-
An optional timeout applied to each run of this job task. The
default behavior is to have no timeout.
format: int32
max_retries:
type: integer
example: 10
description: >-
An optional maximum number of times to retry an unsuccessful run. A
run is considered to be unsuccessful if it completes with the
`FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The
value -1 means to retry indefinitely and the value 0 means to never
retry. The default behavior is to never retry.
format: int32
min_retry_interval_millis:
type: integer
example: 2000
description: >-
An optional minimal interval in milliseconds between the start of
the failed run and the subsequent retry run. The default behavior is
that unsuccessful runs are immediately retried.
format: int32
retry_on_timeout:
type: boolean
example: true
description: >-
An optional policy to specify whether to retry a task when it times
out. The default behavior is to not retry on timeout.
JobCluster:
required:
- job_cluster_key
properties:
job_cluster_key:
type: string
minLength: 1
maxLength: 100
pattern: ^[\w\-]+$
example: auto_scaling_cluster
description: >-
A unique name for the job cluster. This field is required and must
be unique within the job.
`JobTaskSettings` may refer to this field to determine which cluster
to launch for the task execution.
new_cluster:
description: >-
If new_cluster, a description of a cluster that is created for each
task.
$ref: '#/components/schemas/NewCluster'
NewCluster:
required: spark_version
properties:
num_workers:
type: integer
description: >-
If num_workers, number of worker nodes that this cluster must have.
A cluster has one Spark driver and num_workers executors for a total
of num_workers + 1 Spark nodes. When reading the properties of a
cluster, this field reflects the desired number of workers rather
than the actual current number of workers. For example, if a cluster
is resized from 5 to 10 workers, this field immediately updates to
reflect the target size of 10 workers, whereas the workers listed in
`spark_info` gradually increase from 5 to 10 as the new nodes are
provisioned.
format: int32
autoscale:
description: >-
If autoscale, the required parameters to automatically scale
clusters up and down based on load.
$ref: '#/components/schemas/AutoScale'
spark_version:
type: string
description: >-
The Spark version of the cluster. A list of available Spark versions
can be retrieved by using the [Runtime
versions](https://docs.databricks.com/dev-tools/api/latest/clusters.html#runtime-versions)
API call.
spark_conf:
description: >-
An object containing a set of optional, user-specified Spark
configuration key-value pairs. You can also pass in a string of
extra JVM options to the driver and the executors via
`spark.driver.extraJavaOptions` and
`spark.executor.extraJavaOptions` respectively.
Example Spark confs: `{"spark.speculation": true,
"spark.streaming.ui.retainedBatches": 5}` or
`{"spark.driver.extraJavaOptions": "-verbose:gc
-XX:+PrintGCDetails"}`
$ref: '#/components/schemas/SparkConfPair'
aws_attributes:
description: >-
Attributes related to clusters running on Amazon Web Services. If
not specified at cluster creation, a set of default values is used.
$ref: '#/components/schemas/AwsAttributes'
node_type_id:
type: string
description: >-
This field encodes, through a single value, the resources available
to each of the Spark nodes in this cluster. For example, the Spark
nodes can be provisioned and optimized for memory or compute
intensive workloads A list of available node types can be retrieved
by using the [List node
types](https://docs.databricks.com/dev-tools/api/latest/clusters.html#list-node-types)
API call.
driver_node_type_id:
type: string
description: >-
The node type of the Spark driver. This field is optional; if unset,
the driver node type is set as the same value as `node_type_id`
defined above.
ssh_public_keys:
type: array
description: >-
SSH public key contents that are added to each Spark node in this
cluster. The corresponding private keys can be used to login with
the user name `ubuntu` on port `2200`. Up to 10 keys can be
specified.
items:
type: string
custom_tags:
description: >-
An object containing a set of tags for cluster resources. Databricks
tags all cluster resources (such as AWS instances and EBS volumes)
with these tags in addition to default_tags.
**Note**:
* Tags are not supported on legacy node types such as
compute-optimized and memory-optimized
* Databricks allows at most 45 custom tags
$ref: '#/components/schemas/ClusterTag'
cluster_log_conf:
description: >-
The configuration for delivering Spark logs to a long-term storage
destination. Only one destination can be specified for one cluster.
If the conf is given, the logs are delivered to the destination
every `5 mins`. The destination of driver logs is
`<destination>/<cluster-id>/driver`, while the destination of
executor logs is `<destination>/<cluster-id>/executor`.
$ref: '#/components/schemas/ClusterLogConf'
init_scripts:
type: array
description: >-
The configuration for storing init scripts. Any number of scripts
can be specified. The scripts are executed sequentially in the order
provided. If `cluster_log_conf` is specified, init script logs are
sent to `<destination>/<cluster-id>/init_scripts`.
items:
$ref: '#/components/schemas/InitScriptInfo'
spark_env_vars:
description: >-
An object containing a set of optional, user-specified environment
variable key-value pairs. Key-value pair of the form (X,Y) are
exported as is (for example, `export X='Y'`) while launching the
driver and workers.
To specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we
recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in
the following example. This ensures that all default databricks
managed environmental variables are included as well.
Example Spark environment variables: `{"SPARK_WORKER_MEMORY":
"28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or
`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS
-Dspark.shuffle.service.enabled=true"}`
$ref: '#/components/schemas/SparkEnvPair'
enable_elastic_disk:
type: boolean
description: >-
Autoscaling Local Storage: when enabled, this cluster dynamically
acquires additional disk space when its Spark workers are running
low on disk space. This feature requires specific AWS permissions to
function correctly - refer to [Autoscaling local
storage](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage)
for details.
driver_instance_pool_id:
type: string
description: >-
The optional ID of the instance pool to use for the driver node. You
must also specify `instance_pool_id`. Refer to [Instance Pools
API](https://docs.databricks.com/dev-tools/api/latest/instance-pools.html)
for details.
instance_pool_id:
type: string
description: >-
The optional ID of the instance pool to use for cluster nodes. If
`driver_instance_pool_id` is present, `instance_pool_id` is used for
worker nodes only. Otherwise, it is used for both the driver node
and worker nodes. Refer to [Instance Pools
API](https://docs.databricks.com/dev-tools/api/latest/instance-pools.html)
for details.
policy_id:
type: string
description: >-
A [cluster
policy](https://docs.databricks.com/dev-tools/api/latest/policies.html)
ID. Either `node_type_id` or `instance_pool_id` must be specified in
the cluster policy if they are not specified in this job cluster
object.
enable_local_disk_encryption:
type: boolean
description: >-
Determines whether encryption of disks locally attached to the
cluster is enabled.
docker_image:
description: >-
Docker image for a [custom
container](https://docs.databricks.com/clusters/custom-containers.html).
$ref: '#/components/schemas/DockerImage'
runtime_engine:
type: string
description: >-
The type of runtime engine to use. If not specified, the runtime
engine type is inferred based on the `spark_version` value. Allowed
values include:
* `PHOTON`: Use the Photon runtime engine type.
* `STANDARD`: Use the standard runtime engine type.
This field is optional.
NewTaskCluster:
allOf:
- $ref: '#/components/schemas/NewCluster'
description: >-
If new_cluster, a description of a cluster that is created only for this
task.
NotebookOutput:
properties:
result:
type: string
example: An arbitrary string passed by calling dbutils.notebook.exit(...)
description: >-
The value passed to
[dbutils.notebook.exit()](https://docs.databricks.com/notebooks/notebook-workflows.html#notebook-workflows-exit).
Databricks restricts this API to return the first 5 MB of the value.
For a larger result, your job can store the results in a cloud
storage service. This field is absent if `dbutils.notebook.exit()`
was never called.
truncated:
type: boolean
example: false
description: Whether or not the result was truncated.
NotebookTask:
required:
- notebook_path
properties:
notebook_path:
type: string
example: /Users/user.name@databricks.com/notebook_to_run
required: true
description: >-
The path of the notebook to be run in the Databricks workspace or
remote repository. For notebooks stored in the Databricks workspace,
the path must be absolute and begin with a slash. For notebooks
stored in a remote repository, the path must be relative. This field
is required.
source:
type: string
example: WORKSPACE
enum:
- WORKSPACE
- GIT
required: false
description: >-
Optional location type of the notebook. When set to `WORKSPACE`, the
notebook will be retrieved from the local Databricks workspace. When
set to `GIT`, the notebook will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use
`GIT` if `git_source` is defined and `WORKSPACE` otherwise.
base_parameters:
type: object
example:
name: John Doe
age: 35
description: >-
Base parameters to be used for each run of this job. If the run is
initiated by a call to
[`run-now`](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow)
with parameters specified, the two parameters maps are merged. If
the same key is specified in `base_parameters` and in `run-now`, the
value from `run-now` is used.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
If the notebook takes a parameter that is not specified in the job’s
`base_parameters` or the `run-now` override parameters, the default
value from the notebook is used.
Retrieve these parameters in a notebook using
[dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).
additionalProperties: true
RunTask:
properties:
run_id:
type: integer
description: The ID of the task run.
format: int64
example: 99887766
task_key:
$ref: '#/components/schemas/TaskKey'
description:
$ref: '#/components/schemas/TaskDescription'
state:
description: The result and lifecycle states of the run.
$ref: '#/components/schemas/RunState'
depends_on:
$ref: '#/components/schemas/TaskDependencies'
existing_cluster_id:
type: string
description: >-
If existing_cluster_id, the ID of an existing cluster that is used
for all runs of this job. When running jobs on an existing cluster,
you may need to manually restart the cluster if it stops responding.
We suggest running jobs on new clusters for greater reliability.
new_cluster:
example: null
description: >-
If new_cluster, a description of a cluster that is created only for
this task.
$ref: '#/components/schemas/NewTaskCluster'
libraries:
type: array
description: >-
An optional list of libraries to be installed on the cluster that
executes the job. The default value is an empty list.
items:
$ref: '#/components/schemas/Library'
notebook_task:
description: >-
If notebook_task, indicates that this job must run a notebook. This
field may not be specified in conjunction with spark_jar_task.
$ref: '#/components/schemas/NotebookTask'
spark_jar_task:
description: If spark_jar_task, indicates that this job must run a JAR.
$ref: '#/components/schemas/SparkJarTask'
spark_python_task:
description: >-
If spark_python_task, indicates that this job must run a Python
file.
$ref: '#/components/schemas/SparkPythonTask'
spark_submit_task:
description: >-
If spark_submit_task, indicates that this task must be launched by
the spark submit script. This task can run only on new clusters.
$ref: '#/components/schemas/TaskSparkSubmitTask'
pipeline_task:
description: If pipeline_task, indicates that this job must execute a Pipeline.
$ref: '#/components/schemas/PipelineTask'
python_wheel_task:
description: >-
If python_wheel_task, indicates that this job must execute a
PythonWheel.
$ref: '#/components/schemas/PythonWheelTask'
sql_task:
description: >-
If sql_task, indicates that this job must execute a SQL task. It
requires both Databricks SQL and a serverless or a pro SQL
warehouse.
$ref: '#/components/schemas/SqlTask'
dbt_task:
description: >-
If dbt_task, indicates that this must execute a dbt task. It
requires both Databricks SQL and the ability to use a serverless or
a pro SQL warehouse.
$ref: '#/components/schemas/DbtTask'
start_time:
type: integer
example: 1625060460483
description: >-
The time at which this run was started in epoch milliseconds
(milliseconds since 1/1/1970 UTC). This may not be the time when the
job task starts executing, for example, if the job is scheduled to
run on a new cluster, this is the time the cluster creation call is
issued.
format: int64
setup_duration:
type: integer
example: 0
description: >-
The time in milliseconds it took to set up the cluster. For runs
that run on new clusters this is the cluster creation time, for runs
that run on existing clusters this time should be very short. The
duration of a task run is the sum of the `setup_duration`,
`execution_duration`, and the `cleanup_duration`. The
`setup_duration` field is set to 0 for multitask job runs. The total
duration of a multitask job run is the value of the `run_duration`
field.
format: int64
execution_duration:
type: integer
example: 0
description: >-
The time in milliseconds it took to execute the commands in the JAR
or notebook until they completed, failed, timed out, were cancelled,
or encountered an unexpected error.
format: int64
cleanup_duration:
type: integer
example: 0
description: >-
The time in milliseconds it took to terminate the cluster and clean
up any associated artifacts. The total duration of the run is the
sum of the setup_duration, the execution_duration, and the
cleanup_duration.
format: int64
end_time:
type: integer
example: 1625060863413
description: >-
The time at which this run ended in epoch milliseconds (milliseconds
since 1/1/1970 UTC). This field is set to 0 if the job is still
running.
format: int64
attempt_number:
type: integer
example: 0
description: >-
The sequence number of this run attempt for a triggered job run. The
initial attempt of a run has an attempt_number of 0\. If the initial
run attempt fails, and the job has a retry policy (`max_retries` \>
0), subsequent runs are created with an `original_attempt_run_id` of
the original attempt’s ID and an incrementing `attempt_number`. Runs
are retried only until they succeed, and the maximum
`attempt_number` is the same as the `max_retries` value for the job.
format: int32
cluster_instance:
description: >-
The cluster used for this run. If the run is specified to use a new
cluster, this field is set once the Jobs service has requested a
cluster for the run.
$ref: '#/components/schemas/ClusterInstance'
git_source:
description: >-
This functionality is in Public Preview.
An optional specification for a remote repository containing the
notebooks used by this job's notebook tasks.
example:
git_url: https://github.com/databricks/databricks-cli
git_branch: main
git_provider: gitHub
$ref: '#/components/schemas/GitSource'
Run:
properties:
job_id:
type: integer
example: 11223344
description: The canonical identifier of the job that contains this run.
format: int64
run_id:
type: integer
example: 455644833
description: >-
The canonical identifier of the run. This ID is unique across all
runs of all jobs.
format: int64
number_in_job:
type: integer
example: 455644833
deprecated: true
description: >-
A unique identifier for this job run. This is set to the same value
as `run_id`.
format: int64
creator_user_name:
type: string
example: user.name@databricks.com
description: >-
The creator user name. This field won’t be included in the response
if the user has already been deleted.
original_attempt_run_id:
type: integer
example: 455644833
description: >-
If this run is a retry of a prior run attempt, this field contains
the run_id of the original attempt; otherwise, it is the same as the
run_id.
format: int64
state:
description: The result and lifecycle states of the run.
$ref: '#/components/schemas/RunState'
schedule:
description: >-
The cron schedule that triggered this run if it was triggered by the
periodic scheduler.
$ref: '#/components/schemas/CronSchedule'
continuous:
description: The continuous trigger that triggered this run.
$ref: '#/components/schemas/Continuous'
tasks:
description: >-
The list of tasks performed by the run. Each task has its own
`run_id` which you can use to call `JobsGetOutput` to retrieve the
run resutls.
type: array
maxItems: 100
items:
$ref: '#/components/schemas/RunTask'
example:
- run_id: 2112892
task_key: Orders_Ingest
description: Ingests order data
job_cluster_key: auto_scaling_cluster
spark_jar_task:
main_class_name: com.databricks.OrdersIngest
libraries:
- jar: dbfs:/mnt/databricks/OrderIngest.jar
state:
life_cycle_state: INTERNAL_ERROR
result_state: FAILED
state_message: >-
Library installation failed for library due to user error.
Error messages:
'Manage' permissions are required to install libraries on a
cluster
user_cancelled_or_timedout: false
run_page_url: https://my-workspace.cloud.databricks.com/#job/39832/run/20
start_time: 1629989929660
setup_duration: 0
execution_duration: 0
cleanup_duration: 0
end_time: 1629989930171
cluster_instance:
cluster_id: 0923-164208-meows279
spark_context_id: '4348585301701786933'
attempt_number: 0
- run_id: 2112897
task_key: Match
description: Matches orders with user sessions
depends_on:
- task_key: Orders_Ingest
- task_key: Sessionize
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
notebook_task:
notebook_path: /Users/user.name@databricks.com/Match
source: WORKSPACE
state:
life_cycle_state: SKIPPED
state_message: An upstream task failed.
user_cancelled_or_timedout: false
run_page_url: https://my-workspace.cloud.databricks.com/#job/39832/run/21
start_time: 0
setup_duration: 0
execution_duration: 0
cleanup_duration: 0
end_time: 1629989930238
cluster_instance:
cluster_id: 0923-164208-meows279
attempt_number: 0
- run_id: 2112902
task_key: Sessionize
description: Extracts session data from events
existing_cluster_id: 0923-164208-meows279
spark_jar_task:
main_class_name: com.databricks.Sessionize
libraries:
- jar: dbfs:/mnt/databricks/Sessionize.jar
state:
life_cycle_state: INTERNAL_ERROR
result_state: FAILED
state_message: >-
Library installation failed for library due to user error.
Error messages:
'Manage' permissions are required to install libraries on a
cluster
user_cancelled_or_timedout: false
run_page_url: https://my-workspace.cloud.databricks.com/#job/39832/run/22
start_time: 1629989929668
setup_duration: 0
execution_duration: 0
cleanup_duration: 0
end_time: 1629989930144
cluster_instance:
cluster_id: 0923-164208-meows279
spark_context_id: '4348585301701786933'
attempt_number: 0
job_clusters:
type: array
maxItems: 100
description: >-
A list of job cluster specifications that can be shared and reused
by tasks of this job. Libraries cannot be declared in a shared job
cluster. You must declare dependent libraries in task settings.
items:
$ref: '#/components/schemas/JobCluster'
example:
- job_cluster_key: auto_scaling_cluster
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
cluster_spec:
description: >-
A snapshot of the job’s cluster specification when this run was
created.
$ref: '#/components/schemas/ClusterSpec'
cluster_instance:
description: >-
The cluster used for this run. If the run is specified to use a new
cluster, this field is set once the Jobs service has requested a
cluster for the run.
$ref: '#/components/schemas/ClusterInstance'
git_source:
description: >-
This functionality is in Public Preview.
An optional specification for a remote repository containing the
notebooks used by this job's notebook tasks.
example:
git_url: https://github.com/databricks/databricks-cli
git_branch: main
git_provider: gitHub
$ref: '#/components/schemas/GitSource'
overriding_parameters:
description: The parameters used for this run.
$ref: '#/components/schemas/RunParameters'
start_time:
type: integer
example: 1625060460483
description: >-
The time at which this run was started in epoch milliseconds
(milliseconds since 1/1/1970 UTC). This may not be the time when the
job task starts executing, for example, if the job is scheduled to
run on a new cluster, this is the time the cluster creation call is
issued.
format: int64
setup_duration:
type: integer
example: 0
description: >-
The time in milliseconds it took to set up the cluster. For runs
that run on new clusters this is the cluster creation time, for runs
that run on existing clusters this time should be very short. The
duration of a task run is the sum of the `setup_duration`,
`execution_duration`, and the `cleanup_duration`. The
`setup_duration` field is set to 0 for multitask job runs. The total
duration of a multitask job run is the value of the `run_duration`
field.
format: int64
execution_duration:
type: integer
example: 0
description: >-
The time in milliseconds it took to execute the commands in the JAR
or notebook until they completed, failed, timed out, were
cancelled, or encountered an unexpected error. The duration of a
task run is the sum of the `setup_duration`, `execution_duration`,
and the `cleanup_duration`. The `execution_duration` field is set
to 0 for multitask job runs. The total duration of a multitask job
run is the value of the `run_duration` field.
format: int64
cleanup_duration:
type: integer
example: 0
description: >-
The time in milliseconds it took to terminate the cluster and clean
up any associated artifacts. The duration of a task run is the sum
of the `setup_duration`, `execution_duration`, and the
`cleanup_duration`. The `cleanup_duration` field is set to 0 for
multitask job runs. The total duration of a multitask job run is the
value of the `run_duration` field.
format: int64
end_time:
type: integer
example: 1625060863413
description: >-
The time at which this run ended in epoch milliseconds (milliseconds
since 1/1/1970 UTC). This field is set to 0 if the job is still
running.
format: int64
run_duration:
type: integer
example: 3879812
description: >-
The time in milliseconds it took the job run and all of its repairs
to finish. This field is only set for multitask job runs and not
task runs. The duration of a task run is the sum of the
`setup_duration`, `execution_duration`, and the `cleanup_duration`.
trigger:
description: The type of trigger that fired this run.
$ref: '#/components/schemas/TriggerType'
run_name:
type: string
example: A multitask job run
default: Untitled
description: >-
An optional name for the run. The maximum allowed length is 4096
bytes in UTF-8 encoding.
run_page_url:
type: string
description: The URL to the detail page of the run.
example: https://my-workspace.cloud.databricks.com/#job/11223344/run/123
run_type:
$ref: '#/components/schemas/RunType'
attempt_number:
type: integer
example: 0
description: >-
The sequence number of this run attempt for a triggered job run. The
initial attempt of a run has an attempt_number of 0\. If the initial
run attempt fails, and the job has a retry policy (`max_retries` \>
0), subsequent runs are created with an `original_attempt_run_id` of
the original attempt’s ID and an incrementing `attempt_number`. Runs
are retried only until they succeed, and the maximum
`attempt_number` is the same as the `max_retries` value for the job.
format: int32
RunType:
type: string
example: JOB_RUN
enum:
- JOB_RUN
- WORKFLOW_RUN
- SUBMIT_RUN
description: >-
The type of the run.
* `JOB_RUN` \- Normal job run. A run created with [Run
now](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow).
* `WORKFLOW_RUN` \- Workflow run. A run created with
[dbutils.notebook.run](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-workflow).
* `SUBMIT_RUN` \- Submit run. A run created with [Run
Submit](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit).
RunParameters:
properties:
jar_params:
type: array
example:
- john
- doe
- '35'
description: >-
A list of parameters for jobs with Spark JAR tasks, for example
`"jar_params": ["john doe", "35"]`. The parameters are used to
invoke the main function of the main class specified in the Spark
JAR task. If not specified upon `run-now`, it defaults to an empty
list. jar_params cannot be specified in conjunction with
notebook_params. The JSON representation of this field (for example
`{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
items:
type: string
notebook_params:
type: object
example:
name: john doe
age: '35'
description: >-
A map from keys to values for jobs with notebook task, for example
`"notebook_params": {"name": "john doe", "age": "35"}`. The map is
passed to the notebook and is accessible through the
[dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets)
function.
If not specified upon `run-now`, the triggered run uses the job’s
base parameters.
notebook_params cannot be specified in conjunction with jar_params.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
The JSON representation of this field (for example
`{"notebook_params":{"name":"john doe","age":"35"}}`) cannot exceed
10,000 bytes.
additionalProperties: true
python_params:
type: array
example:
- john doe
- '35'
description: >-
A list of parameters for jobs with Python tasks, for example
`"python_params": ["john doe", "35"]`. The parameters are passed to
Python file as command-line parameters. If specified upon `run-now`,
it would overwrite the parameters specified in job setting. The JSON
representation of this field (for example `{"python_params":["john
doe","35"]}`) cannot exceed 10,000 bytes.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
Important
These parameters accept only Latin characters (ASCII character set).
Using non-ASCII characters returns an error. Examples of invalid,
non-ASCII characters are Chinese, Japanese kanjis, and emojis.
items:
type: string
spark_submit_params:
type: array
example:
- '--class'
- org.apache.spark.examples.SparkPi
description: >-
A list of parameters for jobs with spark submit task, for example
`"spark_submit_params": ["--class",
"org.apache.spark.examples.SparkPi"]`. The parameters are passed to
spark-submit script as command-line parameters. If specified upon
`run-now`, it would overwrite the parameters specified in job
setting. The JSON representation of this field (for example
`{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
Important
These parameters accept only Latin characters (ASCII character set).
Using non-ASCII characters returns an error. Examples of invalid,
non-ASCII characters are Chinese, Japanese kanjis, and emojis.
items:
type: string
python_named_params:
type: object
example:
name: task
data: dbfs:/path/to/data.json
description: >-
A map from keys to values for jobs with Python wheel task, for
example `"python_named_params": {"name": "task", "data":
"dbfs:/path/to/data.json"}`.
pipeline_params:
properties:
full_refresh:
type: boolean
description: If true, triggers a full refresh on the delta live table.
sql_params:
type: object
example:
name: john doe
age: '35'
description: >-
A map from keys to values for SQL tasks, for example `"sql_params":
{"name": "john doe", "age": "35"}`. The SQL alert task does not
support custom parameters.
dbt_commands:
type: array
example:
- dbt deps
- dbt seed
- dbt run
description: >-
An array of commands to execute for jobs with the dbt task, for
example `"dbt_commands": ["dbt deps", "dbt seed", "dbt run"]`
RunState:
description: The result and lifecycle state of the run.
properties:
life_cycle_state:
description: >-
A description of a run’s current location in the run lifecycle. This
field is always available in the response.
$ref: '#/components/schemas/RunLifeCycleState'
result_state:
$ref: '#/components/schemas/RunResultState'
user_cancelled_or_timedout:
type: boolean
example: false
description: >-
Whether a run was canceled manually by a user or by the scheduler
because the run timed out.
state_message:
type: string
example: ''
description: >-
A descriptive message for the current state. This field is
unstructured, and its exact format is subject to change.
SparkJarTask:
properties:
main_class_name:
type: string
example: com.databricks.ComputeModels
description: >-
The full name of the class containing the main method to be
executed. This class must be contained in a JAR provided as a
library.
The code must use `SparkContext.getOrCreate` to obtain a Spark
context; otherwise, runs of the job fail.
parameters:
type: array
example:
- '--data'
- dbfs:/path/to/data.json
description: >-
Parameters passed to the main method.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
items:
type: string
jar_uri:
type: string
deprecated: true
description: >-
Deprecated since 04/2016\. Provide a `jar` through the `libraries`
field instead. For an example, see
[Create](https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsCreate).
SparkPythonTask:
required:
- python_file
properties:
python_file:
type: string
example: dbfs:/path/to/file.py
description: >-
The Python file to be executed. Cloud file URIs (such as dbfs:/,
s3:/, adls:/, gcs:/) and workspace paths are supported. For python
files stored in the Databricks workspace, the path must be absolute
and begin with `/`. This field is required.
parameters:
type: array
example:
- '--data'
- dbfs:/path/to/data.json
description: >-
Command line parameters passed to the Python file.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
items:
type: string
SparkSubmitTask:
properties:
parameters:
type: array
example:
- '--class'
- org.apache.spark.examples.SparkPi
- dbfs:/path/to/examples.jar
- '10'
description: >-
Command-line parameters passed to spark submit.
Use [Task parameter
variables](https://docs.databricks.com/jobs.html#parameter-variables)
to set parameters containing information about job runs.
items:
type: string
TaskSparkSubmitTask:
allOf:
- $ref: '#/components/schemas/SparkSubmitTask'
description: >-
If spark_submit_task, indicates that this task must be launched by the
spark submit script. This task can run only on new clusters.
PipelineTask:
properties:
pipeline_id:
type: string
example: a12cd3e4-0ab1-1abc-1a2b-1a2bcd3e4fg5
description: The full name of the pipeline task to execute.
full_refresh:
type: boolean
description: If true, a full refresh will be triggered on the delta live table.
default: false
PythonWheelTask:
properties:
package_name:
type: string
description: Name of the package to execute
entry_point:
type: string
description: >-
Named entry point to use, if it does not exist in the metadata of
the package it executes the function from the package directly using
`$packageName.$entryPoint()`
parameters:
type: array
example:
- '--name=task'
- one
- two
description: >-
Command-line parameters passed to Python wheel task. Leave it empty
if `named_parameters` is not null.
items:
type: string
named_parameters:
type: object
example:
name: task
data: dbfs:/path/to/data.json
description: >-
Command-line parameters passed to Python wheel task in the form of
`["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty
if `parameters` is not null.
SqlTask:
required:
- warehouse_id
properties:
query:
description: If query, indicates that this job must execute a SQL query.
$ref: '#/components/schemas/SqlTaskQuery'
dashboard:
description: If dashboard, indicates that this job must refresh a SQL dashboard.
$ref: '#/components/schemas/SqlTaskDashboard'
alert:
description: If alert, indicates that this job must refresh a SQL alert.
$ref: '#/components/schemas/SqlTaskAlert'
file:
description: >-
If file, indicates that this job runs a SQL file in a remote Git
repository.
$ref: '#/components/schemas/SqlTaskFile'
parameters:
type: object
example:
name: John Doe
age: 35
description: >-
Parameters to be used for each run of this job. The SQL alert task
does not support custom parameters.
warehouse_id:
type: string
description: >-
The canonical identifier of the SQL warehouse. Only serverless and
pro SQL warehouses are supported.
SqlTaskQuery:
required:
- query_id
properties:
query_id:
type: string
description: The canonical identifier of the SQL query.
SqlTaskDashboard:
required:
- dashboard_id
properties:
dashboard_id:
type: string
description: The canonical identifier of the SQL dashboard.
SqlTaskAlert:
required:
- alert_id
properties:
alert_id:
type: string
description: The canonical identifier of the SQL alert.
SqlTaskFile:
required:
- path
properties:
path:
type: string
description: Relative path of the SQL file in the remote Git repository.
SqlOutput:
properties:
query_output:
description: The output of a SQL query task, if available.
$ref: '#/components/schemas/SqlQueryOutput'
dashboard_output:
description: The output of a SQL dashboard task, if available.
$ref: '#/components/schemas/SqlDashboardOutput'
alert_output:
description: The output of a SQL alert task, if available.
$ref: '#/components/schemas/SqlAlertOutput'
SqlQueryOutput:
properties:
query_text:
type: string
description: >-
The text of the SQL query. Can Run permission of the SQL query is
required to view this field.
warehouse_id:
type: string
description: The canonical identifier of the SQL warehouse.
sql_statements:
description: Information about SQL statements executed in the run.
$ref: '#/components/schemas/SqlStatementOutput'
output_link:
type: string
description: The link to find the output results.
SqlDashboardOutput:
properties:
widgets:
description: >-
Widgets executed in the run. Only SQL query based widgets are
listed.
$ref: '#/components/schemas/SqlDashboardWidgetOutput'
SqlAlertOutput:
properties:
query_text:
type: string
description: >-
The text of the SQL query. Can Run permission of the SQL query
associated with the SQL alert is required to view this field.
warehouse_id:
type: string
description: The canonical identifier of the SQL warehouse.
sql_statements:
description: Information about SQL statements executed in the run.
$ref: '#/components/schemas/SqlStatementOutput'
output_link:
type: string
description: The link to find the output results.
SqlStatementOutput:
properties:
lookup_key:
type: string
description: A key that can be used to look up query details.
SqlDashboardWidgetOutput:
properties:
widget_id:
type: string
description: The canonical identifier of the SQL widget.
widget_title:
type: string
description: The title of the SQL widget.
output_link:
type: string
description: The link to find the output results.
status:
type: string
enum:
- PENDING
- RUNNING
- SUCCESS
- FAILED
- CANCELLED
description: The execution status of the SQL widget.
error:
description: The information about the error when execution fails.
$ref: '#/components/schemas/SqlOutputError'
start_time:
type: integer
description: >-
Time (in epoch milliseconds) when execution of the SQL widget
starts.
format: int64
end_time:
type: integer
description: Time (in epoch milliseconds) when execution of the SQL widget ends.
format: int64
SqlOutputError:
properties:
message:
type: string
description: The error message when execution fails.
DbtTask:
required:
- commands
properties:
project_directory:
type: string
description: >-
Optional (relative) path to the project directory, if no value is
provided, the root of the git repository is used.
commands:
type: array
example:
- dbt deps
- dbt seed
- dbt run --models 123
description: >-
A list of dbt commands to execute. All commands must start with
`dbt`. This parameter must not be empty. A maximum of up to 10
commands can be provided.
schema:
type: string
description: >-
Optional schema to write to. This parameter is only used when a
warehouse_id is also provided. If not provided, the `default` schema
is used.
warehouse_id:
type: string
example: 30dade0507d960d1
description: >-
ID of the SQL warehouse to connect to. If provided, we automatically
generate and provide the profile and connection details to dbt. It
can be overridden on a per-command basis by using the
`--profiles-dir` command line argument.
catalog:
type: string
example: main
description: >-
Optional name of the catalog to use. The value is the top level in
the 3-level namespace of Unity Catalog (catalog / schema /
relation). The catalog value can only be specified if a warehouse_id
is specified. Requires dbt-databricks >= 1.1.1.
profiles_directory:
type: string
description: >-
Optional (relative) path to the profiles directory. Can only be
specified if no warehouse_id is specified. If no warehouse_id is
specified and this folder is unset, the root directory is used.
DbtOutput:
properties:
artifacts_link:
type: string
description: >-
A pre-signed URL to download the (compressed) dbt artifacts. This
link is valid for a limited time (30 minutes). This information is
only available after the run has finished.
artifacts_headers:
type: object
description: >-
An optional map of headers to send when retrieving the artifact from
the `artifacts_link`.
ViewItem:
properties:
content:
type: string
description: Content of the view.
name:
type: string
description: >-
Name of the view item. In the case of code view, it would be the
notebook’s name. In the case of dashboard view, it would be the
dashboard’s name.
type:
description: Type of the view item.
$ref: '#/components/schemas/ViewType'
RunLifeCycleState:
type: string
enum:
- TERMINATED
- PENDING
- RUNNING
- TERMINATING
- SKIPPED
- INTERNAL_ERROR
- BLOCKED
- WAITING_FOR_RETRY
description: >-
* `PENDING`: The run has been triggered. If there is not already an
active run of the same job, the cluster and execution context are being
prepared. If there is already an active run of the same job, the run
immediately transitions into the `SKIPPED` state without preparing any
resources.
* `RUNNING`: The task of this run is being executed.
* `TERMINATING`: The task of this run has completed, and the cluster and
execution context are being cleaned up.
* `TERMINATED`: The task of this run has completed, and the cluster and
execution context have been cleaned up. This state is terminal.
* `SKIPPED`: This run was aborted because a previous run of the same job
was already active. This state is terminal.
* `INTERNAL_ERROR`: An exceptional state that indicates a failure in the
Jobs service, such as network failure over a long period. If a run on a
new cluster ends in the `INTERNAL_ERROR` state, the Jobs service
terminates the cluster as soon as possible. This state is terminal.
* `BLOCKED`: The run is blocked on an upstream dependency.
* `WAITING_FOR_RETRY`: The run is waiting for a retry.
RunResultState:
type: string
enum:
- SUCCESS
- FAILED
- TIMEDOUT
- CANCELED
description: |-
* `SUCCESS`: The task completed successfully.
* `FAILED`: The task completed with an error.
* `TIMEDOUT`: The run was stopped after reaching the timeout.
* `CANCELED`: The run was canceled at user request.
TriggerType:
type: string
enum:
- PERIODIC
- ONE_TIME
- RETRY
description: >-
* `PERIODIC`: Schedules that periodically trigger runs, such as a cron
scheduler.
* `ONE_TIME`: One time triggers that fire a single run. This occurs you
triggered a single run on demand through the UI or the API.
* `RETRY`: Indicates a run that is triggered as a retry of a previously
failed run. This occurs when you request to re-run the job in case of
failures.
ViewType:
type: string
enum:
- NOTEBOOK
- DASHBOARD
description: |-
* `NOTEBOOK`: Notebook view item.
* `DASHBOARD`: Dashboard view item.
ViewsToExport:
type: string
default: CODE
enum:
- CODE
- DASHBOARDS
- ALL
description: |-
* `CODE`: Code view of the notebook.
* `DASHBOARDS`: All dashboard views of the notebook.
* `ALL`: All views of the notebook.
AutoScale:
properties:
min_workers:
type: integer
description: >-
The minimum number of workers to which the cluster can scale down
when underutilized. It is also the initial number of workers the
cluster has after creation.
format: int32
max_workers:
type: integer
description: >-
The maximum number of workers to which the cluster can scale up when
overloaded. max_workers must be strictly greater than min_workers.
format: int32
ClusterInfo:
properties:
num_workers:
type: integer
description: >-
If num_workers, number of worker nodes that this cluster must have.
A cluster has one Spark driver and num_workers executors for a total
of num_workers + 1 Spark nodes. **Note:** When reading the
properties of a cluster, this field reflects the desired number of
workers rather than the actual number of workers. For instance, if a
cluster is resized from 5 to 10 workers, this field is immediately
updated to reflect the target size of 10 workers, whereas the
workers listed in `executors` gradually increase from 5 to 10 as the
new nodes are provisioned.
format: int32
autoscale:
description: >-
If autoscale, parameters needed in order to automatically scale
clusters up and down based on load.
$ref: '#/components/schemas/AutoScale'
cluster_id:
type: string
description: >-
Canonical identifier for the cluster. This ID is retained during
cluster restarts and resizes, while each new cluster has a globally
unique ID.
creator_user_name:
type: string
description: >-
Creator user name. The field won’t be included in the response if
the user has already been deleted.
driver:
description: >-
Node on which the Spark driver resides. The driver node contains the
Spark master and the Databricks application that manages the
per-notebook Spark REPLs.
$ref: '#/components/schemas/SparkNode'
executors:
type: array
description: Nodes on which the Spark executors reside.
items:
$ref: '#/components/schemas/SparkNode'
spark_context_id:
type: integer
description: >-
A canonical SparkContext identifier. This value _does_ change when
the Spark driver restarts. The pair `(cluster_id, spark_context_id)`
is a globally unique identifier over all Spark contexts.
format: int64
jdbc_port:
type: integer
description: >-
Port on which Spark JDBC server is listening in the driver node. No
service listens on this port in executor nodes.
format: int32
cluster_name:
type: string
description: >-
Cluster name requested by the user. This doesn’t have to be unique.
If not specified at creation, the cluster name is an empty string.
spark_version:
type: string
description: >-
The runtime version of the cluster. You can retrieve a list of
available runtime versions by using the [Runtime
versions](https://docs.databricks.com/dev-tools/api/latest/clusters.html#runtime-versions)
API call.
spark_conf:
description: >-
An object containing a set of optional, user-specified Spark
configuration key-value pairs. You can also pass in a string of
extra JVM options to the driver and the executors via
`spark.driver.extraJavaOptions` and
`spark.executor.extraJavaOptions` respectively.
Example Spark confs: `{"spark.speculation": true,
"spark.streaming.ui.retainedBatches": 5}` or
`{"spark.driver.extraJavaOptions": "-verbose:gc
-XX:+PrintGCDetails"}`
$ref: '#/components/schemas/SparkConfPair'
aws_attributes:
description: >-
Attributes related to clusters running on Amazon Web Services. If
not specified at cluster creation, a set of default values is used.
$ref: '#/components/schemas/AwsAttributes'
node_type_id:
type: string
description: >-
This field encodes, through a single value, the resources available
to each of the Spark nodes in this cluster. For example, the Spark
nodes can be provisioned and optimized for memory or compute
intensive workloads. A list of available node types can be retrieved
by using the [List node
types](https://docs.databricks.com/dev-tools/api/latest/clusters.html#list-node-types)
API call.
driver_node_type_id:
type: string
description: >-
The node type of the Spark driver. This field is optional; if unset,
the driver node type is set as the same value as `node_type_id`
defined above.
ssh_public_keys:
type: array
description: >-
SSH public key contents that are added to each Spark node in this
cluster. The corresponding private keys can be used to login with
the user name `ubuntu` on port `2200`. Up to 10 keys can be
specified.
items:
type: string
custom_tags:
type: array
description: >-
An object containing a set of tags for cluster resources. Databricks
tags all cluster resources (such as AWS instances and EBS volumes)
with these tags in addition to default_tags.
**Note**:
* Tags are not supported on legacy node types such as
compute-optimized and memory-optimized
* Databricks allows at most 45 custom tags
items:
$ref: '#/components/schemas/ClusterTag'
cluster_log_conf:
description: >-
The configuration for delivering Spark logs to a long-term storage
destination. Only one destination can be specified for one cluster.
If the conf is given, the logs are delivered to the destination
every `5 mins`. The destination of driver logs is
`<destination>/<cluster-ID>/driver`, while the destination of
executor logs is `<destination>/<cluster-ID>/executor`.
$ref: '#/components/schemas/ClusterLogConf'
init_scripts:
type: array
description: >-
The configuration for storing init scripts. Any number of
destinations can be specified. The scripts are executed sequentially
in the order provided. If `cluster_log_conf` is specified, init
script logs are sent to `<destination>/<cluster-ID>/init_scripts`.
items:
$ref: '#/components/schemas/InitScriptInfo'
docker_image:
description: >-
Docker image for a [custom
container](https://docs.databricks.com/clusters/custom-containers.html).
$ref: '#/components/schemas/DockerImage'
spark_env_vars:
description: >-
An object containing a set of optional, user-specified environment
variable key-value pairs. Key-value pairs of the form (X,Y) are
exported as is (that is, `export X='Y'`) while launching the driver
and workers.
To specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we
recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in
the following example. This ensures that all default databricks
managed environmental variables are included as well.
Example Spark environment variables: `{"SPARK_WORKER_MEMORY":
"28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or
`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS
-Dspark.shuffle.service.enabled=true"}`
$ref: '#/components/schemas/SparkEnvPair'
autotermination_minutes:
type: integer
description: >-
Automatically terminates the cluster after it is inactive for this
time in minutes. If not set, this cluster is not be automatically
terminated. If specified, the threshold must be between 10 and 10000
minutes. You can also set this value to 0 to explicitly disable
automatic termination.
format: int32
enable_elastic_disk:
type: boolean
description: >-
Autoscaling Local Storage: when enabled, this cluster dynamically
acquires additional disk space when its Spark workers are running
low on disk space. This feature requires specific AWS permissions to
function correctly - refer to [Autoscaling local
storage](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage)
for details.
instance_pool_id:
type: string
description: >-
The optional ID of the instance pool to which the cluster belongs.
Refer to
[Pools](https://docs.databricks.com/clusters/instance-pools/index.html)
for details.
cluster_source:
description: >-
Determines whether the cluster was created by a user through the UI,
by the Databricks Jobs scheduler, or through an API request.
$ref: '#/components/schemas/ClusterSource'
state:
description: State of the cluster.
$ref: '#/components/schemas/ClusterState'
state_message:
type: string
description: >-
A message associated with the most recent state transition (for
example, the reason why the cluster entered a `TERMINATED` state).
This field is unstructured, and its exact format is subject to
change.
start_time:
type: integer
description: >-
Time (in epoch milliseconds) when the cluster creation request was
received (when the cluster entered a `PENDING` state).
format: int64
terminated_time:
type: integer
description: >-
Time (in epoch milliseconds) when the cluster was terminated, if
applicable.
format: int64
last_state_loss_time:
type: integer
description: >-
Time when the cluster driver last lost its state (due to a restart
or driver failure).
format: int64
last_activity_time:
type: integer
description: >-
Time (in epoch milliseconds) when the cluster was last active. A
cluster is active if there is at least one command that has not
finished on the cluster. This field is available after the cluster
has reached a `RUNNING` state. Updates to this field are made as
best-effort attempts. Certain versions of Spark do not support
reporting of cluster activity. Refer to [Automatic
termination](https://docs.databricks.com/clusters/clusters-manage.html#automatic-termination)
for details.
format: int64
cluster_memory_mb:
type: integer
description: Total amount of cluster memory, in megabytes.
format: int64
cluster_cores:
type: number
description: >-
Number of CPU cores available for this cluster. This can be
fractional since certain node types are configured to share cores
between Spark nodes on the same instance.
format: float
default_tags:
description: >-
An object containing a set of tags that are added by Databricks
regardless of any custom_tags, including:
* Vendor: Databricks
* Creator: <username-of-creator>
* ClusterName: <name-of-cluster>
* ClusterId: <id-of-cluster>
* Name: <Databricks internal use>
On job clusters:
* RunName: <name-of-job>
* JobId: <id-of-job>
On resources used by Databricks SQL:
* SqlEndpointId: <id-of-endpoint>
$ref: '#/components/schemas/ClusterTag'
cluster_log_status:
description: Cluster log delivery status.
$ref: '#/components/schemas/LogSyncStatus'
termination_reason:
description: >-
Information about why the cluster was terminated. This field only
appears when the cluster is in a `TERMINATING` or `TERMINATED`
state.
$ref: '#/components/schemas/TerminationReason'
ClusterEvent:
required:
- cluster_id
- type
- details
properties:
cluster_id:
type: string
description: Canonical identifier for the cluster. This field is required.
timestamp:
type: integer
description: >-
The timestamp when the event occurred, stored as the number of
milliseconds since the unix epoch. Assigned by the Timeline service.
format: int64
type:
description: The event type. This field is required.
$ref: '#/components/schemas/ClusterEventType'
details:
description: The event details. This field is required.
$ref: '#/components/schemas/EventDetails'
ClusterEventType:
type: string
enum:
- CREATING
- DID_NOT_EXPAND_DISK
- EXPANDED_DISK
- FAILED_TO_EXPAND_DISK
- INIT_SCRIPTS_STARTING
- INIT_SCRIPTS_FINISHED
- STARTING
- RESTARTING
- TERMINATING
- EDITED
- RUNNING
- RESIZING
- UPSIZE_COMPLETED
- NODES_LOST
- DRIVER_HEALTHY
- DRIVER_UNAVAILABLE
- SPARK_EXCEPTION
- DRIVER_NOT_RESPONDING
- DBFS_DOWN
- METASTORE_DOWN
- NODE_BLACKLISTED
- PINNED
- UNPINNED
description: >-
* `CREATING`: Indicates that the cluster is being created.
* `DID_NOT_EXPAND_DISK`: Indicates that a disk is low on space, but
adding disks would put it over the max capacity.
* `EXPANDED_DISK`: Indicates that a disk was low on space and the disks
were expanded.
* `FAILED_TO_EXPAND_DISK`: Indicates that a disk was low on space and
disk space could not be expanded.
* `INIT_SCRIPTS_STARTING`: Indicates that the cluster scoped init script
has started.
* `INIT_SCRIPTS_FINISHED`: Indicates that the cluster scoped init script
has finished.
* `STARTING`: Indicates that the cluster is being started.
* `RESTARTING`: Indicates that the cluster is being started.
* `TERMINATING`: Indicates that the cluster is being terminated.
* `EDITED`: Indicates that the cluster has been edited.
* `RUNNING`: Indicates the cluster has finished being created. Includes
the number of nodes in the cluster and a failure reason if some nodes
could not be acquired.
* `RESIZING`: Indicates a change in the target size of the cluster
(upsize or downsize).
* `UPSIZE_COMPLETED`: Indicates that nodes finished being added to the
cluster. Includes the number of nodes in the cluster and a failure
reason if some nodes could not be acquired.
* `NODES_LOST`: Indicates that some nodes were lost from the cluster.
* `DRIVER_HEALTHY`: Indicates that the driver is healthy and the cluster
is ready for use.
* `DRIVER_UNAVAILABLE`: Indicates that the driver is unavailable.
* `SPARK_EXCEPTION`: Indicates that a Spark exception was thrown from
the driver.
* `DRIVER_NOT_RESPONDING`: Indicates that the driver is up but is not
responsive, likely due to GC.
* `DBFS_DOWN`: Indicates that the driver is up but DBFS is down.
* `METASTORE_DOWN`: Indicates that the driver is up but the metastore is
down.
* `NODE_BLACKLISTED`: Indicates that a node is not allowed by Spark.
* `PINNED`: Indicates that the cluster was pinned.
* `UNPINNED`: Indicates that the cluster was unpinned.
EventDetails:
properties:
current_num_workers:
type: integer
description: The number of nodes in the cluster.
format: int32
target_num_workers:
type: integer
description: The targeted number of nodes in the cluster.
format: int32
previous_attributes:
description: The cluster attributes before a cluster was edited.
$ref: '#/components/schemas/AwsAttributes'
attributes:
description: |-
* For created clusters, the attributes of the cluster.
* For edited clusters, the new attributes of the cluster.
$ref: '#/components/schemas/AwsAttributes'
previous_cluster_size:
description: The size of the cluster before an edit or resize.
$ref: '#/components/schemas/ClusterSize'
cluster_size:
description: The cluster size that was set in the cluster creation or edit.
$ref: '#/components/schemas/ClusterSize'
cause:
description: The cause of a change in target size.
$ref: '#/components/schemas/ResizeCause'
reason:
description: >-
A termination reason:
* On a `TERMINATED` event, the reason for the termination.
* On a `RESIZE_COMPLETE` event, indicates the reason that we failed
to acquire some nodes.
$ref: '#/components/schemas/TerminationReason'
user:
type: string
description: >-
The user that caused the event to occur. (Empty if it was done by
Databricks.)
AwsAttributes:
properties:
first_on_demand:
type: integer
description: >-
The first first_on_demand nodes of the cluster are placed on
on-demand instances. If this value is greater than 0, the cluster
driver node is placed on an on-demand instance. If this value is
greater than or equal to the current cluster size, all nodes are
placed on on-demand instances. If this value is less than the
current cluster size, first_on_demand nodes are placed on on-demand
instances and the remainder are placed on `availability` instances.
This value does not affect cluster size and cannot be mutated over
the lifetime of a cluster.
format: int32
availability:
type: string
enum:
- SPOT
- ON_DEMAND
- SPOT_WITH_FALLBACK
description: >-
Availability type used for all subsequent nodes past the
`first_on_demand` ones. **Note:** If `first_on_demand` is zero, this
availability type is used for the entire cluster.
`SPOT`: use spot instances.
`ON_DEMAND`: use on-demand instances.
`SPOT_WITH_FALLBACK`: preferably use spot instances, but fall back
to on-demand instances if spot instances cannot be acquired (for
example, if AWS spot prices are too high).
zone_id:
type: string
description: >-
Identifier for the availability zone/datacenter in which the cluster
resides. You have three options:
**Specify an availability zone as a string**, for example:
“us-west-2a”. The provided availability zone must be in the same
region as the Databricks deployment. For example, “us-west-2a” is
not a valid zone ID if the Databricks deployment resides in the
“us-east-1” region.
**Enable automatic availability zone selection (“Auto-AZ”)**, by
setting the value “auto”. Databricks selects the AZ based on
available IPs in the workspace subnets and retries in other
availability zones if AWS returns insufficient capacity errors.
**Do not specify a value**. If not specified, a default zone is
used.
The list of available zones as well as the default value can be
found by using the [List
zones](https://docs.databricks.com/dev-tools/api/latest/clusters.html#list-zones)
API.
instance_profile_arn:
type: string
description: >-
Nodes for this cluster are only be placed on AWS instances with this
instance profile. If omitted, nodes are placed on instances without
an instance profile. The instance profile must have previously been
added to the Databricks environment by an account administrator.
This feature may only be available to certain customer plans.
spot_bid_price_percent:
type: integer
description: >-
The max price for AWS spot instances, as a percentage of the
corresponding instance type’s on-demand price. For example, if this
field is set to 50, and the cluster needs a new `i3.xlarge` spot
instance, then the max price is half of the price of on-demand
`i3.xlarge` instances. Similarly, if this field is set to 200, the
max price is twice the price of on-demand `i3.xlarge` instances. If
not specified, the default value is 100\. When spot instances are
requested for this cluster, only spot instances whose max price
percentage matches this field is considered. For safety, we enforce
this field to be no more than 10000.
format: int32
ebs_volume_type:
type: string
enum:
- GENERAL_PURPOSE_SSD
- THROUGHPUT_OPTIMIZED_HDD
description: >-
The type of EBS volume that is launched with this cluster.
`GENERAL_PURPOSE_SSD`: provision extra storage using AWS gp2 EBS
volumes.
`THROUGHPUT_OPTIMIZED_HDD`: provision extra storage using AWS st1
volumes.
ebs_volume_count:
type: integer
description: >-
The number of volumes launched for each instance. You can choose up
to 10 volumes. This feature is only enabled for supported node
types. Legacy node types cannot specify custom EBS volumes. For node
types with no instance store, at least one EBS volume needs to be
specified; otherwise, cluster creation fails.
These EBS volumes are mounted at `/ebs0`, `/ebs1`, and etc. Instance
store volumes are mounted at `/local_disk0`, `/local_disk1`, and
etc.
If EBS volumes are attached, Databricks configures Spark to use only
the EBS volumes for scratch storage because heterogeneously sized
scratch devices can lead to inefficient disk utilization. If no EBS
volumes are attached, Databricks configures Spark to use instance
store volumes.
If EBS volumes are specified, then the Spark configuration
`spark.local.dir` is overridden.
format: int32
ebs_volume_size:
type: integer
description: >-
The size of each EBS volume (in GiB) launched for each instance. For
general purpose SSD, this value must be within the range 100 -
4096\. For throughput optimized HDD, this value must be within the
range 500 - 4096\. Custom EBS volumes cannot be specified for the
legacy node types (_memory-optimized_ and _compute-optimized_).
format: int32
ebs_volume_iops:
type: integer
description: >-
The number of IOPS per EBS gp3 volume.
This value must be between 3000 and 16000.
The value of IOPS and throughput is calculated based on AWS
documentation to match the maximum performance of a gp2 volume with
the same volume size.
For more information, see the [EBS volume limit
calculator](https://github.com/awslabs/aws-support-tools/tree/master/EBS/VolumeLimitCalculator).
format: int32
ebs_volume_throughput:
type: integer
description: |-
The throughput per EBS gp3 volume, in MiB per second.
This value must be between 125 and 1000.
format: int32
ClusterAttributes:
properties:
cluster_name:
type: string
description: >-
Cluster name requested by the user. This doesn’t have to be unique.
If not specified at creation, the cluster name is an empty string.
spark_version:
type: string
description: >-
The runtime version of the cluster, for example “5.0.x-scala2.11”.
You can retrieve a list of available runtime versions by using the
[Runtime
versions](https://docs.databricks.com/dev-tools/api/latest/clusters.html#runtime-versions)
API call.
spark_conf:
description: >-
An object containing a set of optional, user-specified Spark
configuration key-value pairs. You can also pass in a string of
extra JVM options to the driver and the executors via
`spark.driver.extraJavaOptions` and
`spark.executor.extraJavaOptions` respectively.
Example Spark confs: `{"spark.speculation": true,
"spark.streaming.ui.retainedBatches": 5}` or
`{"spark.driver.extraJavaOptions": "-verbose:gc
-XX:+PrintGCDetails"}`
$ref: '#/components/schemas/SparkConfPair'
aws_attributes:
description: >-
Attributes related to clusters running on Amazon Web Services. If
not specified at cluster creation, a set of default values are used.
$ref: '#/components/schemas/AwsAttributes'
node_type_id:
type: string
description: >-
This field encodes, through a single value, the resources available
to each of the Spark nodes in this cluster. For example, the Spark
nodes can be provisioned and optimized for memory or compute
intensive workloads A list of available node types can be retrieved
by using the [List node
types](https://docs.databricks.com/dev-tools/api/latest/clusters.html#list-node-types)
API call.
driver_node_type_id:
type: string
description: >-
The node type of the Spark driver. This field is optional; if unset,
the driver node type is set as the same value as `node_type_id`
defined above.
ssh_public_keys:
type: array
description: >-
SSH public key contents that is added to each Spark node in this
cluster. The corresponding private keys can be used to login with
the user name `ubuntu` on port `2200`. Up to 10 keys can be
specified.
items:
type: string
custom_tags:
description: >-
An object containing a set of tags for cluster resources. Databricks
tags all cluster resources (such as AWS instances and EBS volumes)
with these tags in addition to default_tags.
**Note**:
* Tags are not supported on legacy node types such as
compute-optimized and memory-optimized
* Databricks allows at most 45 custom tags
$ref: '#/components/schemas/ClusterTag'
cluster_log_conf:
description: >-
The configuration for delivering Spark logs to a long-term storage
destination. Only one destination can be specified for one cluster.
If the conf is given, the logs is delivered to the destination every
`5 mins`. The destination of driver logs is
`<destination>/<cluster-ID>/driver`, while the destination of
executor logs is `<destination>/<cluster-ID>/executor`.
$ref: '#/components/schemas/ClusterLogConf'
init_scripts:
type: array
description: >-
The configuration for storing init scripts. Any number of
destinations can be specified. The scripts are executed sequentially
in the order provided. If `cluster_log_conf` is specified, init
script logs are sent to `<destination>/<cluster-ID>/init_scripts`.
items:
$ref: '#/components/schemas/InitScriptInfo'
docker_image:
description: >-
Docker image for a [custom
container](https://docs.databricks.com/clusters/custom-containers.html).
$ref: '#/components/schemas/DockerImage'
runtime_engine:
type: string
description: >-
The type of runtime engine to use. If not specified, the runtime
engine type is inferred based on the `spark_version` value. Allowed
values include
* `PHOTON`: Use the Photon runtime engine type.
* `STANDARD`: Use the standard runtime engine type.
This field is optional.
spark_env_vars:
description: >-
An object containing a set of optional, user-specified environment
variable key-value pairs. Key-value pairs of the form (X,Y) are
exported as is (that is, `export X='Y'`) while launching the driver
and workers.
In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`,
we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in
the following example. This ensures that all default databricks
managed environmental variables are included as well.
Example Spark environment variables: `{"SPARK_WORKER_MEMORY":
"28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or
`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS
-Dspark.shuffle.service.enabled=true"}`
$ref: '#/components/schemas/SparkEnvPair'
autotermination_minutes:
type: integer
description: >-
Automatically terminates the cluster after it is inactive for this
time in minutes. If not set, this cluster is not be automatically
terminated. If specified, the threshold must be between 10 and 10000
minutes. You can also set this value to 0 to explicitly disable
automatic termination.
format: int32
enable_elastic_disk:
type: boolean
description: >-
Autoscaling Local Storage: when enabled, this cluster dynamically
acquires additional disk space when its Spark workers are running
low on disk space. This feature requires specific AWS permissions to
function correctly. Refer to [Autoscaling local
storage](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage)
for details.
instance_pool_id:
type: string
description: >-
The optional ID of the instance pool to which the cluster belongs.
Refer to
[Pools](https://docs.databricks.com/clusters/instance-pools/index.html)
for details.
cluster_source:
description: >-
Determines whether the cluster was created by a user through the UI,
created by the Databricks Jobs scheduler, or through an API request.
$ref: '#/components/schemas/ClusterSource'
policy_id:
type: string
description: >-
A [cluster
policy](https://docs.databricks.com/dev-tools/api/latest/policies.html)
ID.
enable_local_disk_encryption:
type: boolean
description: >-
Determines whether encryption of the disks attached to the cluster
locally is enabled.
ClusterSize:
properties:
num_workers:
type: integer
description: >-
If num_workers, number of worker nodes that this cluster must have.
A cluster has one Spark driver and num_workers executors for a total
of num_workers + 1 Spark nodes. When reading the properties of a
cluster, this field reflects the desired number of workers rather
than the actual number of workers. For instance, if a cluster is
resized from 5 to 10 workers, this field is updated to reflect the
target size of 10 workers, whereas the workers listed in executors
gradually increase from 5 to 10 as the new nodes are provisioned.
format: int32
autoscale:
description: >-
If autoscale, parameters needed in order to automatically scale
clusters up and down based on load.
$ref: '#/components/schemas/AutoScale'
ListOrder:
type: string
enum:
- DESC
- ASC
description: |-
* `DESC`: Descending order.
* `ASC`: Ascending order.
ResizeCause:
type: string
enum:
- AUTOSCALE
- USER_REQUEST
- AUTORECOVERY
description: >-
* `AUTOSCALE`: Automatically resized based on load.
* `USER_REQUEST`: User requested a new size.
* `AUTORECOVERY`: Autorecovery monitor resized the cluster after it lost
a node.
ClusterLogConf:
properties:
dbfs:
description: >-
DBFS location of cluster log. Destination must be provided. For
example, `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`
$ref: '#/components/schemas/DbfsStorageInfo'
s3:
description: >-
S3 location of cluster log. `destination` and either `region` or
`endpoint` must be provided. For example, `{ "s3": { "destination" :
"s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }`
$ref: '#/components/schemas/S3StorageInfo'
InitScriptInfo:
properties:
dbfs:
description: >-
DBFS location of init script. Destination must be provided. For
example, `{ "dbfs" : { "destination" : "dbfs:/home/init_script" } }`
$ref: '#/components/schemas/DbfsStorageInfo'
file:
description: >-
File location of init script. Destination must be provided. For
example, `{ "file" : { "destination" : "file:/my/local/file.sh" } }`
$ref: '#/components/schemas/FileStorageInfo'
S3:
description: >-
S3 location of init script. Destination and either region or
endpoint must be provided. For example, `{ "s3": { "destination" :
"s3://init_script_bucket/prefix", "region" : "us-west-2" } }`
$ref: '#/components/schemas/S3StorageInfo'
ClusterTag:
type: object
description: >-
An object with key value pairs. The key length must be between 1 and 127
UTF-8 characters, inclusive. The value length must be less than or equal
to 255 UTF-8 characters. For a list of all restrictions, see AWS Tag
Restrictions:
<https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions>
additionalProperties:
type: string
DbfsStorageInfo:
properties:
destination:
type: string
description: 'DBFS destination. Example: `dbfs:/my/path`'
FileStorageInfo:
properties:
destination:
type: string
description: 'File destination. Example: `file:/my/file.sh`'
DockerImage:
properties:
url:
type: string
description: URL for the Docker image.
basic_auth:
description: Basic authentication information for the Docker repository.
$ref: '#/components/schemas/DockerBasicAuth'
DockerBasicAuth:
properties:
username:
type: string
description: User name for the Docker repository.
password:
type: string
description: Password for the Docker repository.
LogSyncStatus:
properties:
last_attempted:
type: integer
description: >-
The timestamp of last attempt. If the last attempt fails,
last_exception contains the exception in the last attempt.
format: int64
last_exception:
type: string
description: >-
The exception thrown in the last attempt, it would be null (omitted
in the response) if there is no exception in last attempted.
NodeType:
required:
- node_type_id
- memory_mb
- description
- instance_type_id
properties:
node_type_id:
type: string
description: Unique identifier for this node type. This field is required.
memory_mb:
type: integer
description: Memory (in MB) available for this node type. This field is required.
format: int32
num_cores:
type: number
description: >-
Number of CPU cores available for this node type. This can be
fractional if the number of cores on a machine instance is not
divisible by the number of Spark nodes on that machine. This field
is required.
format: float
description:
type: string
description: >-
A string description associated with this node type. This field is
required.
instance_type_id:
type: string
description: >-
An identifier for the type of hardware that this node runs on. This
field is required.
is_deprecated:
type: boolean
description: >-
Whether the node type is deprecated. Non-deprecated node types offer
greater performance.
node_info:
description: Node type info reported by the cloud provider.
$ref: '#/components/schemas/ClusterCloudProviderNodeInfo'
ClusterCloudProviderNodeInfo:
properties:
status:
description: Status as reported by the cloud provider.
$ref: '#/components/schemas/ClusterCloudProviderNodeStatus'
available_core_quota:
type: integer
description: Available CPU core quota.
format: int32
total_core_quota:
type: integer
description: Total CPU core quota.
format: int32
ClusterCloudProviderNodeStatus:
type: string
enum:
- NotEnabledOnSubscription
- NotAvailableInRegion
description: |
* NotEnabledOnSubscription: Node type not available for subscription.
* NotAvailableInRegion: Node type not available in region.
ParameterPair:
type: object
additionalProperties: true
description: >-
An object with additional information about why a cluster was
terminated. The object keys are one of `TerminationParameter` and the
value is the termination information.
SparkConfPair:
type: object
additionalProperties: true
description: >-
An arbitrary object where the object key is a configuration propery name
and the value is a configuration property value.
SparkEnvPair:
type: object
additionalProperties: true
description: >-
An arbitrary object where the object key is an environment variable name
and the value is an environment variable value.
SparkNode:
properties:
private_ip:
type: string
description: >-
Private IP address (typically a 10.x.x.x address) of the Spark node.
This is different from the private IP address of the host instance.
public_dns:
type: string
description: >-
Public DNS address of this node. This address can be used to access
the Spark JDBC server on the driver node. To communicate with the
JDBC server, traffic must be manually authorized by adding security
group rules to the “worker-unmanaged” security group via the AWS
console.
node_id:
type: string
description: Globally unique identifier for this node.
instance_id:
type: string
description: >-
Globally unique identifier for the host instance from the cloud
provider.
start_timestamp:
type: integer
description: The timestamp (in millisecond) when the Spark node is launched.
format: int64
node_aws_attributes:
description: Attributes specific to AWS for a Spark node.
$ref: '#/components/schemas/SparkNodeAwsAttributes'
host_private_ip:
type: string
description: The private IP address of the host instance.
SparkVersion:
properties:
key:
type: string
description: >-
[Databricks Runtime
version](https://docs.databricks.com/dev-tools/api/latest/index.html#programmatic-version)
key, for example `7.3.x-scala2.12`. The value that must be provided
as the `spark_version` when creating a new cluster. The exact
runtime version may change over time for a “wildcard” version (that
is, `7.3.x-scala2.12` is a “wildcard” version) with minor bug fixes.
name:
type: string
description: >-
A descriptive name for the runtime version, for example “Databricks
Runtime 7.3 LTS”.
TerminationReason:
properties:
code:
description: Status code indicating why a cluster was terminated.
$ref: '#/components/schemas/TerminationCode'
type:
description: Reason indicating why a cluster was terminated.
$ref: '#/components/schemas/TerminationType'
parameters:
description: >-
Object containing a set of parameters that provide information about
why a cluster was terminated.
$ref: '#/components/schemas/ParameterPair'
PoolClusterTerminationCode:
type: string
description: >-
* INSTANCE_POOL_MAX_CAPACITY_FAILURE: The pool max capacity has been
reached.
* INSTANCE_POOL_NOT_FOUND_FAILURE: The pool specified by the cluster is
no longer active or doesn’t exist.
enum:
- INSTANCE_POOL_MAX_CAPACITY_FAILURE
- INSTANCE_POOL_NOT_FOUND_FAILURE
ClusterSource:
type: string
enum:
- UI
- JOB
- API
description: |
* UI: Cluster created through the UI.
* JOB: Cluster created by the Databricks job scheduler.
* API: Cluster created through an API call.
ClusterState:
type: string
enum:
- PENDING
- RUNNING
- RESTARTING
- RESIZING
- TERMINATING
- TERMINATED
- ERROR
- UNKNOWN
description: >
* PENDING: Indicates that a cluster is in the process of being created.
* RUNNING: Indicates that a cluster has been started and is ready for
use.
* RESTARTING: Indicates that a cluster is in the process of restarting.
* RESIZING: Indicates that a cluster is in the process of adding or
removing nodes.
* TERMINATING: Indicates that a cluster is in the process of being
destroyed.
* TERMINATED: Indicates that a cluster has been successfully destroyed.
* ERROR: This state is no longer used. It was used to indicate a cluster
that failed to be created. `TERMINATING` and `TERMINATED` are used
instead.
* UNKNOWN: Indicates that a cluster is in an unknown state. A cluster
should never be in this state.
TerminationCode:
type: string
enum:
- USER_REQUEST
- JOB_FINISHED
- INACTIVITY
- CLOUD_PROVIDER_SHUTDOWN
- COMMUNICATION_LOST
- CLOUD_PROVIDER_LAUNCH_FAILURE
- SPARK_STARTUP_FAILURE
- INVALID_ARGUMENT
- UNEXPECTED_LAUNCH_FAILURE
- INTERNAL_ERROR
- SPARK_ERROR
- METASTORE_COMPONENT_UNHEALTHY
- DBFS_COMPONENT_UNHEALTHY
- DRIVER_UNREACHABLE
- DRIVER_UNRESPONSIVE
- INSTANCE_UNREACHABLE
- CONTAINER_LAUNCH_FAILURE
- INSTANCE_POOL_CLUSTER_FAILURE
- REQUEST_REJECTED
- INIT_SCRIPT_FAILURE
- TRIAL_EXPIRED
description: >-
* USER_REQUEST: A user terminated the cluster directly. Parameters
should include a `username` field that indicates the specific user who
terminated the cluster.
* JOB_FINISHED: The cluster was launched by a job, and terminated when
the job completed.
* INACTIVITY: The cluster was terminated since it was idle.
* CLOUD_PROVIDER_SHUTDOWN: The instance that hosted the Spark driver was
terminated by the cloud provider. In AWS, for example, AWS may retire
instances and directly shut them down. Parameters should include an
`aws_instance_state_reason` field indicating the AWS-provided reason why
the instance was terminated.
* COMMUNICATION_LOST: Databricks lost connection to services on the
driver instance. For example, this can happen when problems arise in
cloud networking infrastructure, or when the instance itself becomes
unhealthy.
* CLOUD_PROVIDER_LAUNCH_FAILURE: Databricks experienced a cloud provider
failure when requesting instances to launch clusters. For example, AWS
limits the number of running instances and EBS volumes. If you ask
Databricks to launch a cluster that requires instances or EBS volumes
that exceed your AWS limit, the cluster fails with this status code.
Parameters should include one of `aws_api_error_code`,
`aws_instance_state_reason`, or `aws_spot_request_status` to indicate
the AWS-provided reason why Databricks could not request the required
instances for the cluster.
* SPARK_STARTUP_FAILURE: The cluster failed to initialize. Possible
reasons may include failure to create the environment for Spark or
issues launching the Spark master and worker processes.
* INVALID_ARGUMENT: Cannot launch the cluster because the user specified
an invalid argument. For example, the user might specify an invalid
runtime version for the cluster.
* UNEXPECTED_LAUNCH_FAILURE: While launching this cluster, Databricks
failed to complete critical setup steps, terminating the cluster.
* INTERNAL_ERROR: Databricks encountered an unexpected error that forced
the running cluster to be terminated. Contact Databricks support for
additional details.
* SPARK_ERROR: The Spark driver failed to start. Possible reasons may
include incompatible libraries and initialization scripts that corrupted
the Spark container.
* METASTORE_COMPONENT_UNHEALTHY: The cluster failed to start because the
external metastore could not be reached. Refer to
[Troubleshooting](https://docs.databricks.com/data/metastores/external-hive-metastore.html#troubleshooting).
* DBFS_COMPONENT_UNHEALTHY: The cluster failed to start because
Databricks File System (DBFS) could not be reached.
* DRIVER_UNREACHABLE: Databricks was not able to access the Spark
driver, because it was not reachable.
* DRIVER_UNRESPONSIVE: Databricks was not able to access the Spark
driver, because it was unresponsive.
* INSTANCE_UNREACHABLE: Databricks was not able to access instances in
order to start the cluster. This can be a transient networking issue. If
the problem persists, this usually indicates a networking environment
misconfiguration.
* CONTAINER_LAUNCH_FAILURE: Databricks was unable to launch containers
on worker nodes for the cluster. Have your admin check your network
configuration.
* INSTANCE_POOL_CLUSTER_FAILURE: Pool backed cluster specific failure.
Refer to
[Pools](https://docs.databricks.com/clusters/instance-pools/index.html)
for details.
* REQUEST_REJECTED: Databricks cannot handle the request at this moment.
Try again later and contact Databricks if the problem persists.
* INIT_SCRIPT_FAILURE: Databricks cannot load and run a cluster-scoped
init script on one of the cluster’s nodes, or the init script terminates
with a non-zero exit code. Refer to [Init script
logs](https://docs.databricks.com/clusters/init-scripts.html#init-script-log).
* TRIAL_EXPIRED: The Databricks trial subscription expired.
TerminationType:
type: string
enum:
- SUCCESS
- CLIENT_ERROR
- SERVICE_FAULT
- CLOUD_FAILURE
description: >
* SUCCESS: Termination succeeded.
* CLIENT_ERROR: Non-retriable. Client must fix parameters before
reattempting the cluster creation.
* SERVICE_FAULT: Databricks service issue. Client can retry.
* CLOUD_FAILURECloud provider infrastructure issue. Client can retry
after the underlying issue is resolved.
TerminationParameter:
properties:
username:
type: string
description: The username of the user who terminated the cluster.
aws_api_error_code:
type: string
description: >-
The AWS provided error code describing why cluster nodes could not
be provisioned. For example, `InstanceLimitExceeded` indicates that
the limit of EC2 instances for a specific instance type has been
exceeded. For reference, see:
<https://docs.aws.amazon.com/AWSEC2/latest/APIReference/query-api-troubleshooting.html>.
aws_instance_state_reason:
type: string
description: >-
The AWS provided state reason describing why the driver node was
terminated. For example, `Client.VolumeLimitExceeded` indicates that
the limit of EBS volumes or total EBS volume storage has been
exceeded. For reference, see
<https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_StateReason.html>.
aws_spot_request_status:
type: string
description: >-
Describes why a spot request could not be fulfilled. For example,
`price-too-low` indicates that the max price was lower than the
current spot price. For reference, see:
<https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-bid-status.html#spot-instance-bid-status-understand>.
aws_spot_request_fault_code:
type: string
description: >-
Provides additional details when a spot request fails. For example
`InsufficientFreeAddressesInSubnet` indicates the subnet does not
have free IP addresses to accommodate the new instance. For
reference, see
<https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-spot-instance-requests.html>.
aws_impaired_status_details:
type: string
description: >-
The AWS provided status check which failed and induced a node loss.
This status may correspond to a failed instance or system check. For
reference, see
<https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring-system-instance-status-check.html>.
aws_instance_status_event:
type: string
description: >-
The AWS provided scheduled event (for example reboot) which induced
a node loss. For reference, see
<https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/monitoring-instances-status-check_sched.html>.
aws_error_message:
type: string
description: >-
Human-readable context of various failures from AWS. This field is
unstructured, and its exact format is subject to change.
databricks_error_message:
type: string
description: >-
Additional context that may explain the reason for cluster
termination. This field is unstructured, and its exact format is
subject to change.
inactivity_duration_min:
type: string
description: >-
An idle cluster was shut down after being inactive for this
duration.
instance_id:
type: string
description: The ID of the instance that was hosting the Spark driver.
instance_pool_id:
type: string
description: The ID of the instance pool the cluster is using.
instance_pool_error_code:
type: string
description: >-
The [error
code](https://docs.databricks.com/dev-tools/api/latest/clusters.html#clusterterminationreasonpoolclusterterminationcode)
for cluster failures specific to a pool.
S3StorageInfo:
properties:
destination:
type: string
description: >-
S3 destination. For example: `s3://my-bucket/some-prefix` You must
configure the cluster with an instance profile and the instance
profile must have write access to the destination. You _cannot_ use
AWS keys.
region:
type: string
description: >-
S3 region. For example: `us-west-2`. Either region or endpoint must
be set. If both are set, endpoint is used.
endpoint:
type: string
description: >-
S3 endpoint. For example: `https://s3-us-west-2.amazonaws.com`.
Either region or endpoint must be set. If both are set, endpoint is
used.
enable_encryption:
type: boolean
description: (Optional)Enable server side encryption, `false` by default.
encryption_type:
type: string
description: >-
(Optional) The encryption type, it could be `sse-s3` or `sse-kms`.
It is used only when encryption is enabled and the default type is
`sse-s3`.
kms_key:
type: string
description: >-
(Optional) KMS key used if encryption is enabled and encryption type
is set to `sse-kms`.
canned_acl:
type: string
description: >-
(Optional) Set canned access control list. For example:
`bucket-owner-full-control`. If canned_acl is set, the cluster
instance profile must have `s3:PutObjectAcl` permission on the
destination bucket and prefix. The full list of possible canned ACLs
can be found at
<https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl>.
By default only the object owner gets full control. If you are using
cross account role for writing data, you may want to set
`bucket-owner-full-control` to make bucket owner able to read the
logs.
SparkNodeAwsAttributes:
properties:
is_spot:
type: boolean
description: Whether this node is on an Amazon spot instance.
ClusterLibraryStatuses:
properties:
cluster_id:
type: string
description: Unique identifier for the cluster.
library_statuses:
type: array
description: Status of all libraries on the cluster.
items:
$ref: '#/components/schemas/LibraryFullStatus'
Library:
properties:
jar:
type: string
example: dbfs:/my-jar.jar
description: >-
If jar, URI of the JAR to be installed. DBFS and S3 URIs are
supported. For example: `{ "jar": "dbfs:/mnt/databricks/library.jar"
}` or `{ "jar": "s3://my-bucket/library.jar" }`. If S3 is used, make
sure the cluster has read access on the library. You may need to
launch the cluster with an instance profile to access the S3 URI.
egg:
type: string
example: dbfs:/my/egg
description: >-
If egg, URI of the egg to be installed. DBFS and S3 URIs are
supported. For example: `{ "egg": "dbfs:/my/egg" }` or `{ "egg":
"s3://my-bucket/egg" }`. If S3 is used, make sure the cluster has
read access on the library. You may need to launch the cluster with
an instance profile to access the S3 URI.
whl:
type: string
example: dbfs:/my/whl
description: >-
If whl, URI of the wheel or zipped wheels to be installed. DBFS and
S3 URIs are supported. For example: `{ "whl": "dbfs:/my/whl" }` or
`{ "whl": "s3://my-bucket/whl" }`. If S3 is used, make sure the
cluster has read access on the library. You may need to launch the
cluster with an instance profile to access the S3 URI. Also the
wheel file name needs to use the [correct
convention](https://www.python.org/dev/peps/pep-0427/#file-format).
If zipped wheels are to be installed, the file name suffix should be
`.wheelhouse.zip`.
pypi:
description: >-
If pypi, specification of a PyPI library to be installed. Specifying
the `repo` field is optional and if not specified, the default pip
index is used. For example: `{ "package": "simplejson", "repo":
"https://my-repo.com" }`
$ref: '#/components/schemas/PythonPyPiLibrary'
maven:
description: >-
If maven, specification of a Maven library to be installed. For
example: `{ "coordinates": "org.jsoup:jsoup:1.7.2" }`
$ref: '#/components/schemas/MavenLibrary'
cran:
description: If cran, specification of a CRAN library to be installed.
$ref: '#/components/schemas/RCranLibrary'
LibraryFullStatus:
properties:
library:
description: Unique identifier for the library.
$ref: '#/components/schemas/Library'
status:
description: Status of installing the library on the cluster.
$ref: '#/components/schemas/LibraryInstallStatus'
messages:
type: array
description: >-
All the info and warning messages that have occurred so far for this
library.
items:
type: string
is_library_for_all_clusters:
type: boolean
description: >-
Whether the library was set to be installed on all clusters via the
libraries UI.
MavenLibrary:
required:
- coordinates
properties:
coordinates:
type: string
example: org.jsoup:jsoup:1.7.2
description: >-
Gradle-style Maven coordinates. For example:
`org.jsoup:jsoup:1.7.2`. This field is required.
repo:
type: string
example: https://my-repo.com
description: >-
Maven repo to install the Maven package from. If omitted, both Maven
Central Repository and Spark Packages are searched.
exclusions:
type: array
example:
- slf4j:slf4j
- '*:hadoop-client'
description: >-
List of dependences to exclude. For example: `["slf4j:slf4j",
"*:hadoop-client"]`.
Maven dependency exclusions:
<https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html>.
items:
type: string
PythonPyPiLibrary:
required:
- package
properties:
package:
type: string
example: simplejson==3.8.0
description: >-
The name of the PyPI package to install. An optional exact version
specification is also supported. Examples: `simplejson` and
`simplejson==3.8.0`. This field is required.
repo:
type: string
example: https://my-repo.com
description: >-
The repository where the package can be found. If not specified, the
default pip index is used.
RCranLibrary:
required:
- package
properties:
package:
type: string
example: geojson
description: The name of the CRAN package to install. This field is required.
repo:
type: string
example: https://my-repo.com
description: >-
The repository where the package can be found. If not specified, the
default CRAN repo is used.
LibraryInstallStatus:
type: string
enum:
- PENDING
- RESOLVING
- INSTALLING
- INSTALLED
- SKIPPED
- FAILED
- UNINSTALL_ON_RESTART
description: >-
* `PENDING`: No action has yet been taken to install the library. This
state should be very short lived.
* `RESOLVING`: Metadata necessary to install the library is being
retrieved from the provided repository. For Jar, Egg, and Whl libraries,
this step is a no-op.
* `INSTALLING`: The library is actively being installed, either by
adding resources to Spark or executing system commands inside the Spark
nodes.
* `INSTALLED`: The library has been successfully instally.
* `SKIPPED`: Installation on a Databricks Runtime 7.0 or above cluster
was skipped due to Scala version incompatibility.
* `FAILED`: Some step in installation failed. More information can be
found in the messages field.
* `UNINSTALL_ON_RESTART`: The library has been marked for removal.
Libraries can be removed only when clusters are restarted, so libraries
that enter this state remains until the cluster is restarted.
Error:
type: object
properties:
error_code:
type: string
description: Error code
example: INTERNAL_ERROR
message:
type: string
description: Human-readable error message that describes the cause of the error.
example: Unexpected error.
AccessControlList:
type: object
properties:
access_control_list:
type: array
description: List of permissions to set on the job.
items:
$ref: '#/components/schemas/AccessControlRequest'
AccessControlRequest:
oneOf:
- $ref: '#/components/schemas/AccessControlRequestForUser'
- $ref: '#/components/schemas/AccessControlRequestForGroup'
- $ref: '#/components/schemas/AccessControlRequestForServicePrincipal'
AccessControlRequestForUser:
type: object
properties:
user_name:
$ref: '#/components/schemas/UserName'
permission_level:
$ref: '#/components/schemas/PermissionLevel'
AccessControlRequestForGroup:
type: object
properties:
group_name:
$ref: '#/components/schemas/GroupName'
permission_level:
$ref: '#/components/schemas/PermissionLevelForGroup'
AccessControlRequestForServicePrincipal:
type: object
properties:
service_principal_name:
$ref: '#/components/schemas/ServicePrincipalName'
permission_level:
$ref: '#/components/schemas/PermissionLevel'
UserName:
description: Email address for the user.
type: string
example: jsmith@example.com
GroupName:
description: >-
Group name. There are two built-in groups: `users` for all users, and
`admins` for administrators.
type: string
example: users
ServicePrincipalName:
description: Name of an Azure service principal.
type: string
example: 9f0621ee-b52b-11ea-b3de-0242ac130004
PermissionLevel:
description: Permission level to grant.
oneOf:
- $ref: '#/components/schemas/CanManage'
- $ref: '#/components/schemas/CanManageRun'
- $ref: '#/components/schemas/CanView'
- $ref: '#/components/schemas/IsOwner'
PermissionLevelForGroup:
description: Permission level to grant.
oneOf:
- $ref: '#/components/schemas/CanManage'
- $ref: '#/components/schemas/CanManageRun'
- $ref: '#/components/schemas/CanView'
CanManage:
type: string
description: Permission to manage the job.
enum:
- CAN_MANAGE
CanManageRun:
type: string
description: Permission to run and/or manage runs for the job.
enum:
- CAN_MANAGE_RUN
CanView:
type: string
description: Permission to view the settings of the job.
enum:
- CAN_VIEW
IsOwner:
type: string
description: Perimssion that represents ownership of the job.
enum:
- IS_OWNER
RunSubmitTaskSettings:
required:
- task_key
properties:
task_key:
$ref: '#/components/schemas/TaskKey'
depends_on:
$ref: '#/components/schemas/TaskDependencies'
existing_cluster_id:
type: string
example: 0923-164208-meows279
description: >-
If existing_cluster_id, the ID of an existing cluster that is used
for all runs of this task. When running tasks on an existing
cluster, you may need to manually restart the cluster if it stops
responding. We suggest running jobs on new clusters for greater
reliability.
new_cluster:
description: >-
If new_cluster, a description of a cluster that is created for each
run.
example: null
$ref: '#/components/schemas/NewCluster'
notebook_task:
description: >-
If notebook_task, indicates that this task must run a notebook. This
field may not be specified in conjunction with spark_jar_task.
$ref: '#/components/schemas/NotebookTask'
spark_jar_task:
description: If spark_jar_task, indicates that this task must run a JAR.
$ref: '#/components/schemas/SparkJarTask'
spark_python_task:
description: >-
If spark_python_task, indicates that this task must run a Python
file.
$ref: '#/components/schemas/SparkPythonTask'
spark_submit_task:
description: >-
If spark_submit_task, indicates that this task must be launched by
the spark submit script. This task can run only on new clusters.
$ref: '#/components/schemas/TaskSparkSubmitTask'
pipeline_task:
description: If pipeline_task, indicates that this task must execute a Pipeline.
$ref: '#/components/schemas/PipelineTask'
python_wheel_task:
description: >-
If python_wheel_task, indicates that this job must execute a
PythonWheel.
$ref: '#/components/schemas/PythonWheelTask'
sql_task:
description: >-
If sql_task, indicates that this job must execute a SQL task. It
requires both Databricks SQL and a serverless or a pro SQL
warehouse.
$ref: '#/components/schemas/SqlTask'
dbt_task:
description: >-
If dbt_task, indicates that this must execute a dbt task. It
requires both Databricks SQL and the ability to use a serverless or
a pro SQL warehouse.
$ref: '#/components/schemas/DbtTask'
libraries:
type: array
description: >-
An optional list of libraries to be installed on the cluster that
executes the task. The default value is an empty list.
items:
$ref: '#/components/schemas/Library'
timeout_seconds:
type: integer
example: 86400
description: >-
An optional timeout applied to each run of this job task. The
default behavior is to have no timeout.
format: int32
RunSubmitSettings:
type: object
properties:
tasks:
type: array
maxItems: 100
items:
$ref: '#/components/schemas/RunSubmitTaskSettings'
example:
- task_key: Sessionize
description: Extracts session data from events
depends_on: []
existing_cluster_id: 0923-164208-meows279
spark_jar_task:
main_class_name: com.databricks.Sessionize
parameters:
- '--data'
- dbfs:/path/to/data.json
libraries:
- jar: dbfs:/mnt/databricks/Sessionize.jar
timeout_seconds: 86400
- task_key: Orders_Ingest
description: Ingests order data
depends_on: []
existing_cluster_id: 0923-164208-meows279
spark_jar_task:
main_class_name: com.databricks.OrdersIngest
parameters:
- '--data'
- dbfs:/path/to/order-data.json
libraries:
- jar: dbfs:/mnt/databricks/OrderIngest.jar
timeout_seconds: 86400
- task_key: Match
description: Matches orders with user sessions
depends_on:
- task_key: Orders_Ingest
- task_key: Sessionize
new_cluster:
spark_version: 7.3.x-scala2.12
node_type_id: i3.xlarge
spark_conf:
spark.speculation: true
aws_attributes:
availability: SPOT
zone_id: us-west-2a
autoscale:
min_workers: 2
max_workers: 16
notebook_task:
notebook_path: /Users/user.name@databricks.com/Match
source: WORKSPACE
base_parameters:
name: John Doe
age: '35'
timeout_seconds: 86400
run_name:
type: string
example: A multitask job run
description: An optional name for the run. The default value is `Untitled`.
webhook_notifications:
description: >-
A collection of system notification IDs to notify when runs of this
job begin or complete. The default behavior is to not send any
system notifications.
$ref: '#/components/schemas/WebhookNotifications'
git_source:
description: >-
This functionality is in Public Preview.
An optional specification for a remote repository containing the
notebooks used by this job's notebook tasks.
example:
git_url: https://github.com/databricks/databricks-cli
git_branch: main
git_provider: gitHub
$ref: '#/components/schemas/GitSource'
timeout_seconds:
type: integer
example: 86400
description: >-
An optional timeout applied to each run of this job. The default
behavior is to have no timeout.
format: int32
idempotency_token:
type: string
example: 8f018174-4792-40d5-bcbc-3e6a527352c8
description: >-
An optional token that can be used to guarantee the idempotency of
job run requests. If a run with the provided token already exists,
the request does not create a new run but returns the ID of the
existing run instead. If a run with the provided token is deleted,
an error is returned.
If you specify the idempotency token, upon failure you can retry
until the request succeeds. Databricks guarantees that exactly one
run is launched with that idempotency token.
This token must have at most 64 characters.
For more information, see [How to ensure idempotency for
jobs](https://kb.databricks.com/jobs/jobs-idempotency.html).
RunNowInput:
type: object
properties:
job_id:
type: integer
description: The ID of the job to be executed
example: 11223344
format: int64
idempotency_token:
type: string
example: 8f018174-4792-40d5-bcbc-3e6a527352c8
description: >-
An optional token to guarantee the idempotency of job run requests.
If a run with the provided token already exists, the request does
not create a new run but returns the ID of the existing run instead.
If a run with the provided token is deleted, an error is returned.
If you specify the idempotency token, upon failure you can retry
until the request succeeds. Databricks guarantees that exactly one
run is launched with that idempotency token.
This token must have at most 64 characters.
For more information, see [How to ensure idempotency for
jobs](https://kb.databricks.com/jobs/jobs-idempotency.html).
RepairRunInput:
type: object
properties:
run_id:
description: >-
The job run ID of the run to repair. The run must not be in
progress.
type: integer
format: int64
example: 455644833
rerun_tasks:
description: The task keys of the task runs to repair.
type: array
items:
type: string
example:
- task0
- task1
latest_repair_id:
description: >-
The ID of the latest repair. This parameter is not required when
repairing a run for the first time, but must be provided on
subsequent requests to repair the same run.
type: integer
format: int64
example: 734650698524280
rerun_all_failed_tasks:
description: >-
If true, repair all failed tasks. Only one of rerun_tasks or
rerun_all_failed_tasks can be used.
type: boolean
default: false
RepairHistory:
type: object
properties:
repair_history:
description: The repair history of the run.
type: array
items:
$ref: '#/components/schemas/RepairHistoryItem'
RepairHistoryItem:
type: object
properties:
type:
type: string
description: >-
The repair history item type. Indicates whether a run is the
original run or a repair run.
enum:
- ORIGINAL
- REPAIR
start_time:
description: The start time of the (repaired) run.
type: integer
format: int64
example: 1625060460483
end_time:
description: The end time of the (repaired) run.
type: integer
format: int64
example: 1625060863413
state:
$ref: '#/components/schemas/RunState'
id:
description: >-
The ID of the repair. Only returned for the items that represent a
repair in `repair_history`.
type: integer
format: int64
example: 734650698524280
task_run_ids:
description: >-
The run IDs of the task runs that ran as part of this repair history
item.
type: array
items:
type: integer
format: int64
example:
- 1106460542112844
- 988297789683452
workspace.Import:
required:
- path
properties:
format:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ExportFormat'
path:
description:
The absolute path of the object or directory. Importing a directory
is only supported for the `DBC` format.
type: string
language:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.Language'
content:
format: string
description:
The base64-encoded content. This has a limit of 10 MB.
If the limit (10MB) is exceeded, exception with error code
**MAX_NOTEBOOK_SIZE_EXCEEDED** will be thrown.
This parameter might be absent, and instead a posted file will be
used.
x-databricks-base64: true
type: string
overwrite:
default: false
description:
The flag that specifies whether to overwrite existing object. It is
`false` by default.
For `DBC` format, `overwrite` is not supported since it may contain
a directory.
type: boolean
type: object
workspace.ImportResponse:
properties: {}
type: object
workspace.ExportFormat:
default: SOURCE
description: >
This specifies the format of the file to be imported. By default, this
is `SOURCE`.
If using `AUTO` the item is imported or exported as either a workspace
file or a notebook,depending on an analysis
of the item’s extension and the header content provided in the request.
The value is case sensitive.
type: string
enum:
- SOURCE
- HTML
- JUPYTER
- DBC
- R_MARKDOWN
- AUTO
workspace.Language:
description: >-
The language of the object. This value is set only if the object type is
`NOTEBOOK`.
type: string
enum:
- SCALA
- PYTHON
- SQL
- R
workspace.ExportResponse:
properties:
content:
format: string
description: >-
The base64-encoded content.
If the limit (10MB) is exceeded, exception with error code
**MAX_NOTEBOOK_SIZE_EXCEEDED** will be thrown.
x-databricks-base64: true
type: string
type: object
workspace.ListResponse:
properties:
objects:
description: List of objects.
type: array
items:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ObjectInfo'
type: object
workspace.ObjectInfo:
properties:
path:
description: The absolute path of the object.
x-databricks-name: true
type: string
size:
format: int64
description: <content needed>
type: integer
object_type:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.ObjectType'
language:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.Language'
modified_at:
format: int64
description: <content needed>
type: integer
created_at:
format: int64
description: <content needed>
type: integer
object_id:
format: int64
description: <content needed>
x-databricks-id: true
type: integer
type: object
workspace.ObjectType:
description: The type of the object in workspace.
type: string
enum:
- NOTEBOOK
- DIRECTORY
- LIBRARY
- FILE
- REPO
workspace.Mkdirs:
required:
- path
properties:
path:
description: >
The absolute path of the directory. If the parent directories do not
exist, it will also create them.
If the directory already exists, this command will do nothing and
succeed.
type: string
type: object
workspace.MkdirsResponse:
properties: {}
type: object
workspace.ListReposResponse:
properties:
next_page_token:
description: >-
Token that can be specified as a query parameter to the GET /repos
endpoint to retrieve the next page of results.
example: eyJyZXBvX3RyZWVub2RlX2lkIjo1MjQ5NjA4ODE0NTA5Mjc5fQ==
type: string
repos:
type: array
items:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.RepoInfo'
type: object
workspace.RepoInfo:
properties:
path:
description: >-
Desired path for the repo in the workspace. Must be in the format
/Repos/{folder}/{repo-name}.
example: /Repos/Production/testrepo
x-databricks-name: true
type: string
provider:
description: >-
Git provider. This field is case-insensitive. The available Git
providers are gitHub, bitbucketCloud, gitLab, azureDevOpsServices,
gitHubEnterprise, bitbucketServer, gitLabEnterpriseEdition and
awsCodeCommit.
example: gitHub
type: string
url:
description: URL of the Git repository to be linked.
example: https://github.com/jsmith/test
type: string
branch:
description: Branch that the local version of the repo is checked out to.
example: main
type: string
head_commit_id:
description: >-
SHA-1 hash representing the commit ID of the current HEAD of the
repo.
example: 7e0847ede61f07adede22e2bcce6050216489171
type: string
id:
format: int64
description: ID of the repo object in the workspace.
example: 5249608814509279
x-databricks-id: true
type: integer
sparse_checkout:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.SparseCheckout'
type: object
workspace.SparseCheckout:
properties:
patterns:
description: List of patterns to include for sparse checkout.
type: array
items:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.sparse_checkout_pattern'
type: object
workspace.SparseCheckoutUpdate:
properties:
patterns:
description: List of patterns to include for sparse checkout.
type: array
items:
extRef: true
ref: true
$ref: '#/components/schemas/workspace.sparse_checkout_pattern'
type: object
workspace.UpdateRepo:
properties:
branch:
description: Branch that the local version of the repo is checked out to.
example: main
type: string
sparse_checkout:
description: >-
If specified, update the sparse checkout settings. The update will
fail if sparse checkout is not enabled for the repo.
extRef: true
ref: true
$ref: '#/components/schemas/workspace.SparseCheckoutUpdate'
tag:
description: >-
Tag that the local version of the repo is checked out to. Updating
the repo to a tag puts the repo in a detached HEAD state. Before
committing new changes, you must update the repo to a branch instead
of the detached HEAD.
example: v1.0
type: string
type: object
workspace.sparse_checkout_pattern:
description: >-
Sparse checkout cone pattern, see [cone mode
handling](https://git-scm.com/docs/git-sparse-checkout#_internalscone_mode_handling)
for details.
type: string
workspace.CreateRepo:
required:
- url
- provider
properties:
path:
description:
Desired path for the repo in the workspace. Must be in the format
/Repos/{folder}/{repo-name}.
example: /Repos/Production/testrepo
x-databricks-name: true
type: string
provider:
description:
Git provider. This field is case-insensitive. The available Git
providers are gitHub, bitbucketCloud, gitLab, azureDevOpsServices,
gitHubEnterprise, bitbucketServer, gitLabEnterpriseEdition and
awsCodeCommit.
example: gitHub
type: string
sparse_checkout:
description:
If specified, the repo will be created with sparse checkout enabled.
You cannot enable/disable sparse checkout after the repo is created.
extRef: true
ref: true
$ref: '#/components/schemas/workspace.SparseCheckout'
url:
description: URL of the Git repository to be linked.
example: https://github.com/jsmith/test
type: string
type: object
responses:
StatementResponse:
content:
application/json:
examples:
statement_response_ext_links_succeeded:
summary: Large result sets with EXTERNAL_LINKS + ARROW_STREAM
value:
manifest:
chunks:
- chunk_index: 0
row_count: 100
row_offset: 0
format: ARROW_STREAM
schema:
column_count: 1
columns:
- name: id
position: 0
type_name: LONG
type_text: BIGINT
total_byte_count: 16160
total_chunk_count: 1
total_row_count: 100
result:
external_links:
- chunk_index: 0
expiration: '2023-01-30T22:23:23.140Z'
external_link: https://someplace.s3.us-west-2.amazonaws.com/very/long/path/...
row_count: 100
row_offset: 0
statement_id: 01eda0ea-9b4b-15ce-b8bb-a7d4114cb5ed
status:
state: SUCCEEDED
statement_response_inline_succeeded:
summary: JSON_ARRAY formatted data returned INLINE
value:
manifest:
format: JSON_ARRAY
schema:
column_count: 1
columns:
- name: id
position: 0
type_name: LONG
type_text: BIGINT
result:
chunk_index: 0
data_array:
- - '0'
- - '1'
- - '2'
row_count: 3
row_offset: 0
statement_id: 01eda0e7-e315-1846-84e2-79a963ffad44
status:
state: SUCCEEDED
statement_response_running:
summary: 'Call mode: asynchronous. Submission is accepted'
value:
statement_id: 01ed9db9-24c4-1cb6-a320-fb6ebbe7410d
status:
state: RUNNING
schema:
properties:
manifest:
"$ref": "#/components/schemas/ResultManifest"
result:
"$ref": "#/components/schemas/ResultData"
statement_id:
"$ref": "#/components/schemas/StatementId"
status:
"$ref": "#/components/schemas/StatementStatus"
type: object
sql.ListWarehousesResponse:
properties:
warehouses:
description: A list of warehouses and their configurations.
type: array
items:
extRef: true
ref: true
$ref: '#/components/schemas/sql.EndpointInfo'
type: object
Unauthorized:
description: The request was unauthorized.
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
error_code: PERMISSION_DENIED
message: Unauthorized access.
BadRequest:
description: The request was malformed. See JSON response for error details.
content:
application/json:
example:
error_code: INVALID_PARAMETER_VALUE
message: Invalid value for parameter job_id
schema:
$ref: '#/components/schemas/Error'
NotFound:
description: The requested resource does not exist.
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
examples:
resource_does_not_exist:
value:
error_code: ENDPOINT_NOT_FOUND
message: No API endpoint found
InternalError:
description: The request was not handled correctly due to a server error.
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
Discover other plugins from the programming category

Semgrep
Plugin for Semgrep. A plugin for scanning your code with Semgrep for security, correctness, and performance issues.
0 Comments

Appy Pie Text to App Generator
AI-powered Text-to-App Generator turns your app idea into Android and iOS apps- just provide text input.
0 Comments
![GitHub [UNOFFICIAL]](/_next/image?w=96&q=75&url=https%3A%2F%2Fapi.getit.ai%2Fimage%2F%3Furl%3Dhttps%3A%2F%2Fgh-plugin.teammait.com%2Flogo.png)
GitHub [UNOFFICIAL]
Plugin for interacting with GitHub repositories, accessing file structures, and modifying code. @albfresco for support.
0 Comments
Telnyx Storage
Manage your buckets and objects through simple, intuitive textual commands.
0 Comments

CreatiCode Scratch
Display Scratch programs as images and write 2D/3D programs using CreatiCode Scratch extensions.
0 Comments

DEV Community
Plugin for recommending articles or users from DEV Community.
0 Comments
Progressier
Create powerful PWAs, design screenshots, send push notifications
0 Comments

Bundlejs API
Plugin for bundling, treeshaking, transpiling, compressing and minification of JavaScript and TypeScript code
0 Comments

WordPress.com
Plugin for managing WordPress.com websites, blogging, and ecommerce
0 Comments

GitSearch
Search code on GitHub repositories based on a query.
0 Comments

Page whisperer plugin
Plugin for creating and publishing a web page.
0 Comments

AI2sql
Converts a natural language text into an SQL query.
0 Comments

HTTP Webhooks
Allows you to write, deploy, and manage HTTP Webhooks in JavaScript, right from the chat.
0 Comments

DeployScript
DeployScript effortlessly launches web apps, handling the tech for you. Watch your ideas come to life!
0 Comments
Website Performance
Measure key metrics about your website - performance, accessibility, best practices, SEO, PWA.
0 Comments

Netlify Drop
Describe a simple website you want to make, and deploy it to Netlify to share it with others and claim it as your own.
0 Comments

60sec site
Generate a beautiful website in 60 seconds using AI.
0 Comments

Wordpress Publisher
Publish content directly to a Wordpress blog.
0 Comments

Repo Inspector
Inspect Git Repositories. Submit a GitHub, Gitlab, etc., HTTPS link. The repo will be reviewed by Repo Inspector.
0 Comments

Repo Radar
Your one-stop shop for up to date Github repo information. Find repos by topic, language, or name.
0 Comments

Shuto.IO
Shuto.IO is a multi-tool for creators and developers with SMS, Email, Wordpress and SSH Command Execution capabilities.
0 Comments
CoderPad
Run code in more than 30 different programming languages, and let us worry about setting up the environment!
1 Comments

Code Runner
Run and Save your code while creating visualizations (charts and graphs) supports upto 70 programming languages.
2 Comments

AskTheCode
Provide a GitHub repository URL with the C# project and ask about any aspect of the code.
0 Comments

Scraper
Scrape content from webpages by providing a URL.
0 Comments

Appy Pie App Builder
AI-powered Text-to-App Generator turns your app idea into Android and iOS apps- just provide text input.
0 Comments

Databricks (dev-azure-westus)
A plugin that allows the user to interact with Databricks.
0 Comments

WP Interact
Fetch or search posts from self-hosted WordPress websites, opening new possibilities for smart interaction with content.
0 Comments

QyrusTestPilot
You can test your webapp without writing a single line of code by just giving the URL
0 Comments

Git OSS Stats
Dynamically generate and analyze stats and history for OSS repos and developers.
0 Comments

LGTM
Search for LGTM Markdown.
0 Comments
Chat Stack Search
Use the Stack Exchange API to search sites such as Stack Overflow and return questions, answers, and other information.
0 Comments
Shor in QASM
Generate quantum circuits for Shor's algorithm in QASM format.
0 Comments

WebDev
Build a live website within seconds directly from the chat and preview and test HTML code with JavaScript and CSS.
0 Comments

Cloud Diagram Gen
Generate cloud architecture diagrams. Unofficial AWS plugin.
0 Comments

GitAIOps
Unofficial tool for GitLab CI/CD workflows. Streamlines merge request reviews, pipeline debugging, and more.
0 Comments
ad4mat
API to monetize outgoing traffic via tracking links.
0 Comments

Web5 assistant
Assistant for developers building a web5 application by providing examples, documentation and writing web5 code.
0 Comments
AI with Jira®
Unofficial plugin for Jira®. Create issues such as tasks, user stories and bugs directly on your Jira® project.
0 Comments

Text to SQL Plugin
Plugin that converts a natural language text into an SQL query.
0 Comments
Devhunt
Find your next favorite tool.
0 Comments

Talk With Docs
Ask any questions to your docs.
0 Comments

Recombinant AI™
Input a Github repo URL. Get a holistic,deep, relational understanding of the codebase.
0 Comments
CodeCast Wandbox
Instantly compile your code online with Wandbox. Improve your coding learning experience.
1 Comments
StackOverflow Plus
Expand your knowledge beyond 2021/09/01. Get recent questions and answers from Stack Overflow.
0 Comments

A/B JUDGE
Judge whether the A/B test results are superior or not.
0 Comments

ne.tools
Network diagnostic tool for DNS record lookup (A, AA, MX, NS and more) and WHOIS queries for domains and IP addresses.
0 Comments

Code Library Search
Answer any questions about Python libraries (Currently Langchain and Openai). Can answer version specific questions.
0 Comments

Service Check
Check if services are running and how fast they are responding. You can check Website, Email, Domain Lookup and Ping.
0 Comments

AI Extensions
Craft your extensions with ease! Step-by-step guidance at your fingertips.
0 Comments

Passive DNS Pivots
Global pDNS 800 Billion records searchable. Unearth nefarious domains within minutes of creation.
0 Comments

WPressGenie
Manages a WordPress site. You can get, view and analyze details of WordPress posts and content.
0 Comments
API Bot
This is a conversational bot that lets you ask questions about a variety of common APIs.
0 Comments

SPARQL Query
Returns Query Solution Page for Query Text.
0 Comments

Codeulator
Collaborate with AI directly inside your favorite code editor.
0 Comments

Calculator Tools
Create any app, calculator, tool, tracker, or game instantly with AI.
0 Comments

Site Analysis and Moderation
This plugin provides data analysis, content moderation and automatic report generation functionalities for your website.
0 Comments

Check Website Down
Insert your website URL to check the availability. Pulsetic tests the uptime, availability & performance of a website.
0 Comments
Take Code Captures
Capture, beautify, and share your code snippets easily.
0 Comments