docs: add model methods like start stop

This commit is contained in:
0xSage 2023-11-19 21:39:24 +08:00
parent 126c2e0132
commit 9553c70d07

View File

@ -2309,6 +2309,7 @@ paths:
]
}
# Models
/models:
get:
operationId: listModels
@ -2374,6 +2375,61 @@ paths:
],
"object": "list"
}
post:
operationId: importModel
tags:
- Models
summary: Import Model
description: Imports a model instance. The model can be from a local folder, remote source, or an API endpoint. The model importer will examine the source_url for formatting.
parameters:
- in: path
name: source_url
required: true
schema:
type: string
# ideally this will be an actual ID, so this will always work from browser
example: https://huggingface.com/thebloke/example.gguf
description: The ID of the model to use for this request
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: "#/components/schemas/Model"
x-oaiMeta:
name: Import model
returns: The [model](/docs/api-reference/models/object) object matching the specified ID.
examples:
request:
curl: |
curl https://api.openai.com/v1/models/VAR_model_id \
-H "Authorization: Bearer $OPENAI_API_KEY"
python: |
from openai import OpenAI
client = OpenAI()
client.models.retrieve("VAR_model_id")
node.js: |-
import OpenAI from "openai";
const openai = new OpenAI();
async function main() {
const model = await openai.models.retrieve("gpt-3.5-turbo");
console.log(model);
}
main();
response: &retrieve_model_response |
{
"id": "VAR_model_id",
"object": "model",
"created": 1686935002,
"owned_by": "openai",
"state": "ready"
}
/models/{model}:
get:
operationId: retrieveModel
@ -2481,6 +2537,138 @@ paths:
"object": "model",
"deleted": true
}
post:
operationId: startModel
tags:
- Models
summary: Start Model
description: Starts an imported model. Loads the model into V/RAM.
parameters:
- in: path
name: model
required: true
schema:
type: string
# ideally this will be an actual ID, so this will always work from browser
example: gpt-3.5-turbo
description: The ID of the model to use for this request
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: "#/components/schemas/Model"
x-oaiMeta:
name: Import model
returns: The [model](/docs/api-reference/models/object) object matching the specified ID.
examples:
request:
curl: |
curl https://api.openai.com/v1/models/VAR_model_id \
-H "Authorization: Bearer $OPENAI_API_KEY"
python: |
from openai import OpenAI
client = OpenAI()
client.models.retrieve("VAR_model_id")
node.js: |-
import OpenAI from "openai";
const openai = new OpenAI();
async function main() {
const model = await openai.models.retrieve("gpt-3.5-turbo");
console.log(model);
}
main();
response: &retrieve_model_response |
{
"id": "VAR_model_id",
"object": "model",
"created": 1686935002,
"owned_by": "openai"
}
/models/{model}/cancel:
post:
operationId: cancelModel
tags:
- Models
summary: Cancel Model
description: Stops a running model. Unloads the model from V/RAM.
parameters:
- in: path
name: model
required: true
schema:
type: string
description: The ID of the model that is running.
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: "#/components/schemas/RunObject"
x-oaiMeta:
name: Cancel a running model
beta: true
returns: The modified [run](/docs/api-reference/runs/object) object matching the specified ID.
examples:
request:
curl: |
curl https://api.openai.com/v1/threads/thread_1cjnJPXj8MFiqTx58jU9TivC/runs/run_BeRGmpGt2wb1VI22ZRniOkrR/cancel \
-H 'Authorization: Bearer $OPENAI_API_KEY' \
-H 'OpenAI-Beta: assistants=v1' \
-X POST
python: |
from openai import OpenAI
client = OpenAI()
run = client.beta.threads.runs.cancel(
thread_id="thread_1cjnJPXj8MFiqTx58jU9TivC",
run_id="run_BeRGmpGt2wb1VI22ZRniOkrR"
)
print(run)
node.js: |
import OpenAI from "openai";
const openai = new OpenAI();
async function main() {
const run = await openai.beta.threads.runs.cancel(
"thread_1cjnJPXj8MFiqTx58jU9TivC",
"run_BeRGmpGt2wb1VI22ZRniOkrR"
);
console.log(run);
}
main();
response: |
{
"id": "run_BeRGmpGt2wb1VI22ZRniOkrR",
"object": "model",
"created_at": 1699076126,
"status": "cancelling",
"started_at": 1699076126,
"expires_at": 1699076726,
"cancelled_at": null,
"failed_at": null,
"completed_at": null,
"last_error": null,
"model": "gpt-4",
"instructions": "You summarize books.",
"tools": [
{
"type": "retrieval"
}
],
"file_ids": [],
"metadata": {}
}
/moderations:
post:
@ -3062,6 +3250,7 @@ paths:
"deleted": true
}
# Threads
/threads:
post:
operationId: createThread
@ -7389,14 +7578,15 @@ components:
type: string
description: TODO
default: "ASSISTANT:"
default: {
default:
{
ctx_len: 2048,
ngl: 100,
embedding: true,
n_parallel: 4,
pre_prompt: "A chat between a curious user and an artificial intelligence",
user_prompt: "USER:",
ai_prompt: "ASSISTANT:"
ai_prompt: "ASSISTANT:",
}
runtime:
type: object
@ -7421,19 +7611,20 @@ components:
type: string
description: TODO
default: true
default: {
default:
{
temperature: 0.7,
token_limit: 2048,
top_k: 0,
top_p: 1,
stream: true
stream: true,
}
metadata:
type: object
properties:
engine:
type: string
enum: [llamacpp, api,tensorrt]
enum: [llamacpp, api, tensorrt]
default: llamacpp
quantization:
type: string
@ -7446,12 +7637,8 @@ components:
type: array
description: TODO
default: TODO
default: {
engine: llamacpp,
quantization: Q4_K_M,
size: 7b,
binaries: TODO
}
default:
{ engine: llamacpp, quantization: Q4_K_M, size: 7b, binaries: TODO }
required:
- id # From OpenAI
- version