2023-07-04 02:31:55 +08:00
|
|
|
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
|
|
|
|
|
|
2024-03-09 02:52:20 +08:00
|
|
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
|
|
|
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|
|
|
|
from vllm.engine.llm_engine import LLMEngine
|
|
|
|
|
from vllm.entrypoints.llm import LLM
|
2024-04-25 14:52:22 +08:00
|
|
|
from vllm.executor.ray_utils import initialize_ray_cluster
|
2024-09-27 11:35:15 +08:00
|
|
|
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
|
2024-04-07 08:11:41 +08:00
|
|
|
from vllm.model_executor.models import ModelRegistry
|
2024-05-12 02:30:37 +08:00
|
|
|
from vllm.outputs import (CompletionOutput, EmbeddingOutput,
|
|
|
|
|
EmbeddingRequestOutput, RequestOutput)
|
|
|
|
|
from vllm.pooling_params import PoolingParams
|
2024-03-09 02:52:20 +08:00
|
|
|
from vllm.sampling_params import SamplingParams
|
2023-06-17 18:07:40 +08:00
|
|
|
|
2024-09-24 00:44:26 +08:00
|
|
|
from .version import __version__, __version_tuple__
|
2023-06-17 18:07:40 +08:00
|
|
|
|
|
|
|
|
__all__ = [
|
2024-06-14 02:21:39 +08:00
|
|
|
"__version__",
|
2024-09-24 00:44:26 +08:00
|
|
|
"__version_tuple__",
|
2023-06-17 18:07:40 +08:00
|
|
|
"LLM",
|
2024-04-07 08:11:41 +08:00
|
|
|
"ModelRegistry",
|
2024-09-27 11:35:15 +08:00
|
|
|
"PromptType",
|
2024-05-29 04:29:31 +08:00
|
|
|
"TextPrompt",
|
|
|
|
|
"TokensPrompt",
|
2023-06-17 18:07:40 +08:00
|
|
|
"SamplingParams",
|
|
|
|
|
"RequestOutput",
|
|
|
|
|
"CompletionOutput",
|
2024-05-12 02:30:37 +08:00
|
|
|
"EmbeddingOutput",
|
|
|
|
|
"EmbeddingRequestOutput",
|
2023-06-17 18:07:40 +08:00
|
|
|
"LLMEngine",
|
|
|
|
|
"EngineArgs",
|
|
|
|
|
"AsyncLLMEngine",
|
|
|
|
|
"AsyncEngineArgs",
|
2024-03-12 02:03:45 +08:00
|
|
|
"initialize_ray_cluster",
|
2024-05-12 02:30:37 +08:00
|
|
|
"PoolingParams",
|
2023-06-17 18:07:40 +08:00
|
|
|
]
|