115 lines
4.2 KiB
Python
115 lines
4.2 KiB
Python
from typing import Dict, List, Optional, Union
|
|
|
|
try:
|
|
from ollama import Client
|
|
except ImportError:
|
|
raise ImportError("The 'ollama' library is required. Please install it using 'pip install ollama'.")
|
|
|
|
from neomem.configs.llms.base import BaseLlmConfig
|
|
from neomem.configs.llms.ollama import OllamaConfig
|
|
from neomem.llms.base import LLMBase
|
|
|
|
|
|
class OllamaLLM(LLMBase):
|
|
def __init__(self, config: Optional[Union[BaseLlmConfig, OllamaConfig, Dict]] = None):
|
|
# Convert to OllamaConfig if needed
|
|
if config is None:
|
|
config = OllamaConfig()
|
|
elif isinstance(config, dict):
|
|
config = OllamaConfig(**config)
|
|
elif isinstance(config, BaseLlmConfig) and not isinstance(config, OllamaConfig):
|
|
# Convert BaseLlmConfig to OllamaConfig
|
|
config = OllamaConfig(
|
|
model=config.model,
|
|
temperature=config.temperature,
|
|
api_key=config.api_key,
|
|
max_tokens=config.max_tokens,
|
|
top_p=config.top_p,
|
|
top_k=config.top_k,
|
|
enable_vision=config.enable_vision,
|
|
vision_details=config.vision_details,
|
|
http_client_proxies=config.http_client,
|
|
)
|
|
|
|
super().__init__(config)
|
|
|
|
if not self.config.model:
|
|
self.config.model = "llama3.1:70b"
|
|
|
|
self.client = Client(host=self.config.ollama_base_url)
|
|
|
|
def _parse_response(self, response, tools):
|
|
"""
|
|
Process the response based on whether tools are used or not.
|
|
|
|
Args:
|
|
response: The raw response from API.
|
|
tools: The list of tools provided in the request.
|
|
|
|
Returns:
|
|
str or dict: The processed response.
|
|
"""
|
|
if tools:
|
|
processed_response = {
|
|
"content": response["message"]["content"] if isinstance(response, dict) else response.message.content,
|
|
"tool_calls": [],
|
|
}
|
|
|
|
# Ollama doesn't support tool calls in the same way, so we return the content
|
|
return processed_response
|
|
else:
|
|
# Handle both dict and object responses
|
|
if isinstance(response, dict):
|
|
return response["message"]["content"]
|
|
else:
|
|
return response.message.content
|
|
|
|
def generate_response(
|
|
self,
|
|
messages: List[Dict[str, str]],
|
|
response_format=None,
|
|
tools: Optional[List[Dict]] = None,
|
|
tool_choice: str = "auto",
|
|
**kwargs,
|
|
):
|
|
"""
|
|
Generate a response based on the given messages using Ollama.
|
|
|
|
Args:
|
|
messages (list): List of message dicts containing 'role' and 'content'.
|
|
response_format (str or object, optional): Format of the response. Defaults to "text".
|
|
tools (list, optional): List of tools that the model can call. Defaults to None.
|
|
tool_choice (str, optional): Tool choice method. Defaults to "auto".
|
|
**kwargs: Additional Ollama-specific parameters.
|
|
|
|
Returns:
|
|
str: The generated response.
|
|
"""
|
|
# Build parameters for Ollama
|
|
params = {
|
|
"model": self.config.model,
|
|
"messages": messages,
|
|
}
|
|
|
|
# Handle JSON response format by using Ollama's native format parameter
|
|
if response_format and response_format.get("type") == "json_object":
|
|
params["format"] = "json"
|
|
if messages and messages[-1]["role"] == "user":
|
|
messages[-1]["content"] += "\n\nPlease respond with valid JSON only."
|
|
else:
|
|
messages.append({"role": "user", "content": "Please respond with valid JSON only."})
|
|
|
|
# Add options for Ollama (temperature, num_predict, top_p)
|
|
options = {
|
|
"temperature": self.config.temperature,
|
|
"num_predict": self.config.max_tokens,
|
|
"top_p": self.config.top_p,
|
|
}
|
|
params["options"] = options
|
|
|
|
# Remove OpenAI-specific parameters that Ollama doesn't support
|
|
params.pop("max_tokens", None) # Ollama uses different parameter names
|
|
|
|
response = self.client.chat(**params)
|
|
return self._parse_response(response, tools)
|