Initial clean commit - unified Lyra stack

2025-11-16 03:17:32 -05:00
commit 94fb091e59
270 changed files with 74200 additions and 0 deletions
--- a/neomem/neomem/memory/utils.py
+++ b/neomem/neomem/memory/utils.py
@@ -0,0 +1,187 @@
+import hashlib
+import re
+
+from neomem.configs.prompts import FACT_RETRIEVAL_PROMPT
+
+
+def get_fact_retrieval_messages(message):
+    return FACT_RETRIEVAL_PROMPT, f"Input:\n{message}"
+
+
+def parse_messages(messages):
+    response = ""
+    for msg in messages:
+        if msg["role"] == "system":
+            response += f"system: {msg['content']}\n"
+        if msg["role"] == "user":
+            response += f"user: {msg['content']}\n"
+        if msg["role"] == "assistant":
+            response += f"assistant: {msg['content']}\n"
+    return response
+
+
+def format_entities(entities):
+    if not entities:
+        return ""
+
+    formatted_lines = []
+    for entity in entities:
+        simplified = f"{entity['source']} -- {entity['relationship']} -- {entity['destination']}"
+        formatted_lines.append(simplified)
+
+    return "\n".join(formatted_lines)
+
+
+def remove_code_blocks(content: str) -> str:
+    """
+    Removes enclosing code block markers ```[language] and ``` from a given string.
+
+    Remarks:
+    - The function uses a regex pattern to match code blocks that may start with ``` followed by an optional language tag (letters or numbers) and end with ```.
+    - If a code block is detected, it returns only the inner content, stripping out the markers.
+    - If no code block markers are found, the original content is returned as-is.
+    """
+    pattern = r"^```[a-zA-Z0-9]*\n([\s\S]*?)\n```$"
+    match = re.match(pattern, content.strip())
+    match_res=match.group(1).strip() if match else content.strip()
+    return re.sub(r"<think>.*?</think>", "", match_res, flags=re.DOTALL).strip()
+
+
+
+def extract_json(text):
+    """
+    Extracts JSON content from a string, removing enclosing triple backticks and optional 'json' tag if present.
+    If no code block is found, returns the text as-is.
+    """
+    text = text.strip()
+    match = re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL)
+    if match:
+        json_str = match.group(1)
+    else:
+        json_str = text  # assume it's raw JSON
+    return json_str
+
+
+def get_image_description(image_obj, llm, vision_details):
+    """
+    Get the description of the image
+    """
+
+    if isinstance(image_obj, str):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "A user is providing an image. Provide a high level description of the image and do not include any additional text.",
+                    },
+                    {"type": "image_url", "image_url": {"url": image_obj, "detail": vision_details}},
+                ],
+            },
+        ]
+    else:
+        messages = [image_obj]
+
+    response = llm.generate_response(messages=messages)
+    return response
+
+
+def parse_vision_messages(messages, llm=None, vision_details="auto"):
+    """
+    Parse the vision messages from the messages
+    """
+    returned_messages = []
+    for msg in messages:
+        if msg["role"] == "system":
+            returned_messages.append(msg)
+            continue
+
+        # Handle message content
+        if isinstance(msg["content"], list):
+            # Multiple image URLs in content
+            description = get_image_description(msg, llm, vision_details)
+            returned_messages.append({"role": msg["role"], "content": description})
+        elif isinstance(msg["content"], dict) and msg["content"].get("type") == "image_url":
+            # Single image content
+            image_url = msg["content"]["image_url"]["url"]
+            try:
+                description = get_image_description(image_url, llm, vision_details)
+                returned_messages.append({"role": msg["role"], "content": description})
+            except Exception:
+                raise Exception(f"Error while downloading {image_url}.")
+        else:
+            # Regular text content
+            returned_messages.append(msg)
+
+    return returned_messages
+
+
+def process_telemetry_filters(filters):
+    """
+    Process the telemetry filters
+    """
+    if filters is None:
+        return {}
+
+    encoded_ids = {}
+    if "user_id" in filters:
+        encoded_ids["user_id"] = hashlib.md5(filters["user_id"].encode()).hexdigest()
+    if "agent_id" in filters:
+        encoded_ids["agent_id"] = hashlib.md5(filters["agent_id"].encode()).hexdigest()
+    if "run_id" in filters:
+        encoded_ids["run_id"] = hashlib.md5(filters["run_id"].encode()).hexdigest()
+
+    return list(filters.keys()), encoded_ids
+
+
+def sanitize_relationship_for_cypher(relationship) -> str:
+    """Sanitize relationship text for Cypher queries by replacing problematic characters."""
+    char_map = {
+        "...": "_ellipsis_",
+        "…": "_ellipsis_",
+        "。": "_period_",
+        "，": "_comma_",
+        "；": "_semicolon_",
+        "：": "_colon_",
+        "！": "_exclamation_",
+        "？": "_question_",
+        "（": "_lparen_",
+        "）": "_rparen_",
+        "【": "_lbracket_",
+        "】": "_rbracket_",
+        "《": "_langle_",
+        "》": "_rangle_",
+        "'": "_apostrophe_",
+        '"': "_quote_",
+        "\\": "_backslash_",
+        "/": "_slash_",
+        "|": "_pipe_",
+        "&": "_ampersand_",
+        "=": "_equals_",
+        "+": "_plus_",
+        "*": "_asterisk_",
+        "^": "_caret_",
+        "%": "_percent_",
+        "$": "_dollar_",
+        "#": "_hash_",
+        "@": "_at_",
+        "!": "_bang_",
+        "?": "_question_",
+        "(": "_lparen_",
+        ")": "_rparen_",
+        "[": "_lbracket_",
+        "]": "_rbracket_",
+        "{": "_lbrace_",
+        "}": "_rbrace_",
+        "<": "_langle_",
+        ">": "_rangle_",
+    }
+
+    # Apply replacements and clean up
+    sanitized = relationship
+    for old, new in char_map.items():
+        sanitized = sanitized.replace(old, new)
+
+    return re.sub(r"_+", "_", sanitized).strip("_")
+