docs: add model methods like start stop
This commit is contained in:
parent
126c2e0132
commit
9553c70d07
@ -2309,6 +2309,7 @@ paths:
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Models
|
||||||
/models:
|
/models:
|
||||||
get:
|
get:
|
||||||
operationId: listModels
|
operationId: listModels
|
||||||
@ -2374,6 +2375,61 @@ paths:
|
|||||||
],
|
],
|
||||||
"object": "list"
|
"object": "list"
|
||||||
}
|
}
|
||||||
|
post:
|
||||||
|
operationId: importModel
|
||||||
|
tags:
|
||||||
|
- Models
|
||||||
|
summary: Import Model
|
||||||
|
description: Imports a model instance. The model can be from a local folder, remote source, or an API endpoint. The model importer will examine the source_url for formatting.
|
||||||
|
parameters:
|
||||||
|
- in: path
|
||||||
|
name: source_url
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
# ideally this will be an actual ID, so this will always work from browser
|
||||||
|
example: https://huggingface.com/thebloke/example.gguf
|
||||||
|
description: The ID of the model to use for this request
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/Model"
|
||||||
|
x-oaiMeta:
|
||||||
|
name: Import model
|
||||||
|
returns: The [model](/docs/api-reference/models/object) object matching the specified ID.
|
||||||
|
examples:
|
||||||
|
request:
|
||||||
|
curl: |
|
||||||
|
curl https://api.openai.com/v1/models/VAR_model_id \
|
||||||
|
-H "Authorization: Bearer $OPENAI_API_KEY"
|
||||||
|
python: |
|
||||||
|
from openai import OpenAI
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
client.models.retrieve("VAR_model_id")
|
||||||
|
node.js: |-
|
||||||
|
import OpenAI from "openai";
|
||||||
|
|
||||||
|
const openai = new OpenAI();
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const model = await openai.models.retrieve("gpt-3.5-turbo");
|
||||||
|
|
||||||
|
console.log(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
response: &retrieve_model_response |
|
||||||
|
{
|
||||||
|
"id": "VAR_model_id",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1686935002,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"state": "ready"
|
||||||
|
}
|
||||||
/models/{model}:
|
/models/{model}:
|
||||||
get:
|
get:
|
||||||
operationId: retrieveModel
|
operationId: retrieveModel
|
||||||
@ -2481,6 +2537,138 @@ paths:
|
|||||||
"object": "model",
|
"object": "model",
|
||||||
"deleted": true
|
"deleted": true
|
||||||
}
|
}
|
||||||
|
post:
|
||||||
|
operationId: startModel
|
||||||
|
tags:
|
||||||
|
- Models
|
||||||
|
summary: Start Model
|
||||||
|
description: Starts an imported model. Loads the model into V/RAM.
|
||||||
|
parameters:
|
||||||
|
- in: path
|
||||||
|
name: model
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
# ideally this will be an actual ID, so this will always work from browser
|
||||||
|
example: gpt-3.5-turbo
|
||||||
|
description: The ID of the model to use for this request
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/Model"
|
||||||
|
x-oaiMeta:
|
||||||
|
name: Import model
|
||||||
|
returns: The [model](/docs/api-reference/models/object) object matching the specified ID.
|
||||||
|
examples:
|
||||||
|
request:
|
||||||
|
curl: |
|
||||||
|
curl https://api.openai.com/v1/models/VAR_model_id \
|
||||||
|
-H "Authorization: Bearer $OPENAI_API_KEY"
|
||||||
|
python: |
|
||||||
|
from openai import OpenAI
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
client.models.retrieve("VAR_model_id")
|
||||||
|
node.js: |-
|
||||||
|
import OpenAI from "openai";
|
||||||
|
|
||||||
|
const openai = new OpenAI();
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const model = await openai.models.retrieve("gpt-3.5-turbo");
|
||||||
|
|
||||||
|
console.log(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
response: &retrieve_model_response |
|
||||||
|
{
|
||||||
|
"id": "VAR_model_id",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1686935002,
|
||||||
|
"owned_by": "openai"
|
||||||
|
}
|
||||||
|
/models/{model}/cancel:
|
||||||
|
post:
|
||||||
|
operationId: cancelModel
|
||||||
|
tags:
|
||||||
|
- Models
|
||||||
|
summary: Cancel Model
|
||||||
|
description: Stops a running model. Unloads the model from V/RAM.
|
||||||
|
parameters:
|
||||||
|
- in: path
|
||||||
|
name: model
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
description: The ID of the model that is running.
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/RunObject"
|
||||||
|
x-oaiMeta:
|
||||||
|
name: Cancel a running model
|
||||||
|
beta: true
|
||||||
|
returns: The modified [run](/docs/api-reference/runs/object) object matching the specified ID.
|
||||||
|
examples:
|
||||||
|
request:
|
||||||
|
curl: |
|
||||||
|
curl https://api.openai.com/v1/threads/thread_1cjnJPXj8MFiqTx58jU9TivC/runs/run_BeRGmpGt2wb1VI22ZRniOkrR/cancel \
|
||||||
|
-H 'Authorization: Bearer $OPENAI_API_KEY' \
|
||||||
|
-H 'OpenAI-Beta: assistants=v1' \
|
||||||
|
-X POST
|
||||||
|
python: |
|
||||||
|
from openai import OpenAI
|
||||||
|
client = OpenAI()
|
||||||
|
|
||||||
|
run = client.beta.threads.runs.cancel(
|
||||||
|
thread_id="thread_1cjnJPXj8MFiqTx58jU9TivC",
|
||||||
|
run_id="run_BeRGmpGt2wb1VI22ZRniOkrR"
|
||||||
|
)
|
||||||
|
print(run)
|
||||||
|
node.js: |
|
||||||
|
import OpenAI from "openai";
|
||||||
|
|
||||||
|
const openai = new OpenAI();
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const run = await openai.beta.threads.runs.cancel(
|
||||||
|
"thread_1cjnJPXj8MFiqTx58jU9TivC",
|
||||||
|
"run_BeRGmpGt2wb1VI22ZRniOkrR"
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(run);
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
|
response: |
|
||||||
|
{
|
||||||
|
"id": "run_BeRGmpGt2wb1VI22ZRniOkrR",
|
||||||
|
"object": "model",
|
||||||
|
"created_at": 1699076126,
|
||||||
|
"status": "cancelling",
|
||||||
|
"started_at": 1699076126,
|
||||||
|
"expires_at": 1699076726,
|
||||||
|
"cancelled_at": null,
|
||||||
|
"failed_at": null,
|
||||||
|
"completed_at": null,
|
||||||
|
"last_error": null,
|
||||||
|
"model": "gpt-4",
|
||||||
|
"instructions": "You summarize books.",
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "retrieval"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"file_ids": [],
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
|
||||||
/moderations:
|
/moderations:
|
||||||
post:
|
post:
|
||||||
@ -3062,6 +3250,7 @@ paths:
|
|||||||
"deleted": true
|
"deleted": true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Threads
|
||||||
/threads:
|
/threads:
|
||||||
post:
|
post:
|
||||||
operationId: createThread
|
operationId: createThread
|
||||||
@ -7324,7 +7513,7 @@ components:
|
|||||||
object:
|
object:
|
||||||
type: string
|
type: string
|
||||||
default: model
|
default: model
|
||||||
version:
|
version:
|
||||||
type: integer
|
type: integer
|
||||||
description: The version of the Model Object file
|
description: The version of the Model Object file
|
||||||
default: 1
|
default: 1
|
||||||
@ -7333,14 +7522,14 @@ components:
|
|||||||
format: uri
|
format: uri
|
||||||
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
|
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
|
||||||
description: The model download source. It can be an external url or a local filepath.
|
description: The model download source. It can be an external url or a local filepath.
|
||||||
id: # OpenAI-equivalent
|
id: # OpenAI-equivalent
|
||||||
type: string
|
type: string
|
||||||
description: The model identifier, which can be referenced in the API endpoints.
|
description: The model identifier, which can be referenced in the API endpoints.
|
||||||
example: zephyr-7b
|
example: zephyr-7b
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
description: Human-readable name that is used for UI
|
description: Human-readable name that is used for UI
|
||||||
owned_by: # OpenAI-equivalent
|
owned_by: # OpenAI-equivalent
|
||||||
type: string
|
type: string
|
||||||
description: The organization that owns the model (you!)
|
description: The organization that owns the model (you!)
|
||||||
default: you # TODO
|
default: you # TODO
|
||||||
@ -7356,16 +7545,16 @@ components:
|
|||||||
default: to_download
|
default: to_download
|
||||||
parameters:
|
parameters:
|
||||||
type: object
|
type: object
|
||||||
description:
|
description:
|
||||||
properties:
|
properties:
|
||||||
init:
|
init:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
ctx_len:
|
ctx_len:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 2048
|
default: 2048
|
||||||
ngl:
|
ngl:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 100
|
default: 100
|
||||||
@ -7373,92 +7562,90 @@ components:
|
|||||||
type: bool
|
type: bool
|
||||||
description: TODO
|
description: TODO
|
||||||
default: true
|
default: true
|
||||||
n_parallel:
|
n_parallel:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 4
|
default: 4
|
||||||
pre_prompt:
|
pre_prompt:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: A chat between a curious user and an artificial intelligence
|
default: A chat between a curious user and an artificial intelligence
|
||||||
user_prompt:
|
user_prompt:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: "USER:"
|
default: "USER:"
|
||||||
ai_prompt:
|
ai_prompt:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: "ASSISTANT:"
|
default: "ASSISTANT:"
|
||||||
default: {
|
default:
|
||||||
ctx_len: 2048,
|
{
|
||||||
ngl: 100,
|
ctx_len: 2048,
|
||||||
embedding: true,
|
ngl: 100,
|
||||||
n_parallel: 4,
|
embedding: true,
|
||||||
pre_prompt: "A chat between a curious user and an artificial intelligence",
|
n_parallel: 4,
|
||||||
user_prompt: "USER:",
|
pre_prompt: "A chat between a curious user and an artificial intelligence",
|
||||||
ai_prompt: "ASSISTANT:"
|
user_prompt: "USER:",
|
||||||
}
|
ai_prompt: "ASSISTANT:",
|
||||||
|
}
|
||||||
runtime:
|
runtime:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
temperature:
|
temperature:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 0.7
|
default: 0.7
|
||||||
token_limit:
|
token_limit:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 2048
|
default: 2048
|
||||||
top_k:
|
top_k:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 0
|
default: 0
|
||||||
top_p:
|
top_p:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: 1
|
default: 1
|
||||||
stream:
|
stream:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: true
|
default: true
|
||||||
default: {
|
default:
|
||||||
temperature: 0.7,
|
{
|
||||||
token_limit: 2048,
|
temperature: 0.7,
|
||||||
top_k: 0,
|
token_limit: 2048,
|
||||||
top_p: 1,
|
top_k: 0,
|
||||||
stream: true
|
top_p: 1,
|
||||||
}
|
stream: true,
|
||||||
|
}
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
engine:
|
engine:
|
||||||
type: string
|
type: string
|
||||||
enum: [llamacpp, api,tensorrt]
|
enum: [llamacpp, api, tensorrt]
|
||||||
default: llamacpp
|
default: llamacpp
|
||||||
quantization:
|
quantization:
|
||||||
type: string
|
type: string
|
||||||
description: TODO
|
description: TODO
|
||||||
default: Q4_K_M
|
default: Q4_K_M
|
||||||
size:
|
size:
|
||||||
type: string
|
type: string
|
||||||
default: 7b
|
default: 7b
|
||||||
binaries:
|
binaries:
|
||||||
type: array
|
type: array
|
||||||
description: TODO
|
description: TODO
|
||||||
default: TODO
|
default: TODO
|
||||||
default: {
|
default:
|
||||||
engine: llamacpp,
|
{ engine: llamacpp, quantization: Q4_K_M, size: 7b, binaries: TODO }
|
||||||
quantization: Q4_K_M,
|
|
||||||
size: 7b,
|
|
||||||
binaries: TODO
|
|
||||||
}
|
|
||||||
required:
|
required:
|
||||||
- id # From OpenAI
|
- id # From OpenAI
|
||||||
- version
|
- version
|
||||||
- source_url
|
- source_url
|
||||||
- created # From OpenAI, autogenerated in Jan
|
- created # From OpenAI, autogenerated in Jan
|
||||||
- object # From OpenAI, autogenerated in Jan
|
- object # From OpenAI, autogenerated in Jan
|
||||||
- owned_by # From OpenAI, autogenerated in Jan
|
- owned_by # From OpenAI, autogenerated in Jan
|
||||||
x-oaiMeta:
|
x-oaiMeta:
|
||||||
name: The model object
|
name: The model object
|
||||||
example: *retrieve_model_response
|
example: *retrieve_model_response
|
||||||
@ -9616,4 +9803,4 @@ x-oaiMeta:
|
|||||||
path: object
|
path: object
|
||||||
- type: endpoint
|
- type: endpoint
|
||||||
key: createEdit
|
key: createEdit
|
||||||
path: create
|
path: create
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user