feat: init ai activity chat session

2025-12-19 04:19:25 +00:00 · 2023-12-30 16:48:57 +00:00 · 2023-12-30 16:48:57 +00:00 · f7d76eea1e
commit f7d76eea1e
parent ddab6d6483
10 changed files with 305 additions and 8 deletions
--- a/apps/api/config/config.py
+++ b/apps/api/config/config.py
@ -1,6 +1,7 @@
 from typing import Literal, Optional
 from pydantic import BaseModel
 import os
+from dotenv import load_dotenv
 import yaml


@ -23,6 +24,11 @@ class SecurityConfig(BaseModel):
    auth_jwt_secret_key: str


+class AIConfig(BaseModel):
+    openai_api_key: str | None
+    is_ai_enabled: bool | None
+
+
 class S3ApiConfig(BaseModel):
    bucket_name: str | None
    endpoint_url: str | None
@ -58,9 +64,13 @@ class LearnHouseConfig(BaseModel):
    hosting_config: HostingConfig
    database_config: DatabaseConfig
    security_config: SecurityConfig
+    ai_config: AIConfig


 def get_learnhouse_config() -> LearnHouseConfig:
+
+    load_dotenv()
+
    # Get the YAML file
    yaml_path = os.path.join(os.path.dirname(__file__), "config.yaml")

@ -173,6 +183,16 @@ def get_learnhouse_config() -> LearnHouseConfig:
        "mongo_connection_string"
    )

+    # AI Config
+    env_openai_api_key = os.environ.get("LEARNHOUSE_OPENAI_API_KEY")
+    env_is_ai_enabled = os.environ.get("LEARNHOUSE_IS_AI_ENABLED")
+    openai_api_key = env_openai_api_key or yaml_config.get("ai_config", {}).get(
+        "openai_api_key"
+    )
+    is_ai_enabled = env_is_ai_enabled or yaml_config.get("ai_config", {}).get(
+        "is_ai_enabled"
+    )
+
    # Sentry config
    # check if the sentry config is provided in the YAML file
    sentry_config_verif = (
@ -217,6 +237,12 @@ def get_learnhouse_config() -> LearnHouseConfig:
        mongo_connection_string=mongo_connection_string,
    )

+    # AI Config
+    ai_config = AIConfig(
+        openai_api_key=openai_api_key,
+        is_ai_enabled=bool(is_ai_enabled),
+    )
+
    # Create LearnHouseConfig object
    config = LearnHouseConfig(
        site_name=site_name,
@ -228,6 +254,7 @@ def get_learnhouse_config() -> LearnHouseConfig:
        hosting_config=hosting_config,
        database_config=database_config,
        security_config=SecurityConfig(auth_jwt_secret_key=auth_jwt_secret_key),
+        ai_config=ai_config,
    )

    return config
--- a/apps/api/requirements.txt
+++ b/apps/api/requirements.txt
@ -18,3 +18,9 @@ requests
 pyyaml
 sentry-sdk[fastapi]
 pydantic[email]>=1.8.0,<2.0.0
+langchain
+tiktoken
+openai
+chromadb
+sentence-transformers
+python-dotenv
--- a/apps/api/src/db/courses.py
+++ b/apps/api/src/db/courses.py
@ -40,7 +40,7 @@ class CourseUpdate(CourseBase):
 class CourseRead(CourseBase):
    id: int
    org_id: int = Field(default=None, foreign_key="organization.id")
-    authors: List[UserRead]
+    authors: Optional[List[UserRead]]
    course_uuid: str
    creation_date: str
    update_date: str
--- a/apps/api/src/router.py
+++ b/apps/api/src/router.py
@ -1,5 +1,6 @@
 from fastapi import APIRouter, Depends
 from src.routers import blocks, dev, trail, users, auth, orgs, roles
+from src.routers.ai import ai
 from src.routers.courses import chapters, collections, courses, activities
 from src.routers.install import install
 from src.services.dev.dev import isDevModeEnabledOrRaise
@ -18,14 +19,16 @@ v1_router.include_router(blocks.router, prefix="/blocks", tags=["blocks"])
 v1_router.include_router(courses.router, prefix="/courses", tags=["courses"])
 v1_router.include_router(chapters.router, prefix="/chapters", tags=["chapters"])
 v1_router.include_router(activities.router, prefix="/activities", tags=["activities"])
-v1_router.include_router(
-    collections.router, prefix="/collections", tags=["collections"]
-)
+v1_router.include_router(collections.router, prefix="/collections", tags=["collections"])
 v1_router.include_router(trail.router, prefix="/trail", tags=["trail"])
+v1_router.include_router(ai.router, prefix="/ai", tags=["ai"])

 # Dev Routes
 v1_router.include_router(
-    dev.router, prefix="/dev", tags=["dev"], dependencies=[Depends(isDevModeEnabledOrRaise)]
+    dev.router,
+    prefix="/dev",
+    tags=["dev"],
+    dependencies=[Depends(isDevModeEnabledOrRaise)],
 )

 # Install Routes
@ -35,4 +38,3 @@ v1_router.include_router(
    tags=["install"],
    dependencies=[Depends(isInstallModeEnabled)],
 )
-
--- a/apps/api/src/routers/ai/ai.py
+++ b/apps/api/src/routers/ai/ai.py
@ -0,0 +1,25 @@
+from fastapi import APIRouter, Depends, Request
+from sqlmodel import Session
+from src.services.ai.ai import ai_start_activity_chat_session
+from src.services.ai.schemas.ai import StartActivityAIChatSession
+from src.core.events.database import get_db_session
+from src.db.users import PublicUser
+from src.security.auth import get_current_user
+
+
+router = APIRouter()
+
+
+@router.post("/start/activity_chat_session")
+async def api_ai_start_activity_chat_session(
+    request: Request,
+    chat_session_object: StartActivityAIChatSession,
+    current_user: PublicUser = Depends(get_current_user),
+    db_session: Session = Depends(get_db_session),
+):
+    """
+    Start a new AI Chat session with a Course Activity
+    """
+    return ai_start_activity_chat_session(
+        request, chat_session_object, current_user, db_session
+    )
--- a/apps/api/src/services/ai/ai.py
+++ b/apps/api/src/services/ai/ai.py
@ -0,0 +1,65 @@
+from fastapi import Depends, HTTPException, Request
+from sqlmodel import Session, select
+from src.db.courses import Course, CourseRead
+from src.core.events.database import get_db_session
+from src.db.users import PublicUser
+from src.db.activities import Activity, ActivityRead
+from src.security.auth import get_current_user
+from src.services.ai.base import ask_ai
+
+from src.services.ai.schemas.ai import StartActivityAIChatSession
+from src.services.courses.activities.utils import (
+    serialize_activity_text_to_ai_comprehensible_text,
+    structure_activity_content_by_type,
+)
+
+
+def ai_start_activity_chat_session(
+    request: Request,
+    chat_session_object: StartActivityAIChatSession,
+    current_user: PublicUser = Depends(get_current_user),
+    db_session: Session = Depends(get_db_session),
+):
+    """
+    Start a new AI Chat session with a Course Activity
+    """
+    # Get the Activity
+    statement = select(Activity).where(
+        Activity.activity_uuid == chat_session_object.activity_uuid
+    )
+    activity = db_session.exec(statement).first()
+
+    activity = ActivityRead.from_orm(activity)
+
+    # Get the Course
+    statement = select(Course).join(Activity).where(
+        Activity.activity_uuid == chat_session_object.activity_uuid
+    )
+    course = db_session.exec(statement).first()
+    course = CourseRead.from_orm(course)
+
+
+    if not activity:
+        raise HTTPException(
+            status_code=404,
+            detail="Activity not found",
+        )
+
+    # Get Activity Content Blocks
+    content = activity.content
+
+    # Serialize Activity Content Blocks to a text comprehensible by the AI
+    structured = structure_activity_content_by_type(content)
+    ai_friendly_text = serialize_activity_text_to_ai_comprehensible_text(structured,course,activity)
+
+    response = ask_ai(
+        chat_session_object.message,
+        [],
+        ai_friendly_text,
+        "You are a helpful Education Assistant, and you are helping a student with the associated Course. "
+        "Use the available tools to get context about this question even if the question is not specific enough."
+        "For context, this is the Course name :" + course.name + " and this is the Lecture name :" + activity.name + "."
+        "Use your knowledge to help the student."
+    )
+
+    return response['output'] 
--- a/apps/api/src/services/ai/base.py
+++ b/apps/api/src/services/ai/base.py
@ -0,0 +1,82 @@
+from langchain.agents import AgentExecutor
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+from langchain.vectorstores import Chroma
+from langchain_core.messages import BaseMessage
+from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
+from langchain.prompts import MessagesPlaceholder
+from langchain.memory import ConversationBufferMemory
+from langchain_core.messages import SystemMessage
+from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
+    AgentTokenBufferMemory,
+)
+from langchain.chat_models import ChatOpenAI
+from langchain.agents.agent_toolkits import (
+    create_retriever_tool,
+)
+
+import chromadb
+
+from config.config import get_learnhouse_config
+
+client = chromadb.Client()
+
+
+chat_history = []
+
+
+def ask_ai(
+    question: str,
+    chat_history: list[BaseMessage],
+    text_reference: str,
+    message_for_the_prompt: str,
+):
+    # Get API Keys
+    LH_CONFIG = get_learnhouse_config()
+    openai_api_key = LH_CONFIG.ai_config.openai_api_key
+
+    # split it into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    documents = text_splitter.create_documents([text_reference])
+    texts = text_splitter.split_documents(documents)
+
+    # create the open-source embedding function
+    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+
+    # load it into Chroma and use it as a retriever
+    db = Chroma.from_documents(texts, embedding_function)
+    tool = create_retriever_tool(
+        db.as_retriever(),
+        "find_context_text",
+        "Find associated text to get context about a course or a lecture",
+    )
+    tools = [tool]
+
+    llm = ChatOpenAI(
+        temperature=0, api_key=openai_api_key
+    )
+
+    memory_key = "history"
+
+    memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm)
+
+
+    system_message = SystemMessage(content=(message_for_the_prompt))
+
+    prompt = OpenAIFunctionsAgent.create_prompt(
+        system_message=system_message,
+        extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
+    )
+
+    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
+
+
+    agent_executor = AgentExecutor(
+        agent=agent,
+        tools=tools,
+        memory=memory,
+        verbose=True,
+        return_intermediate_steps=True,
+    )
+
+    return agent_executor({"input": question})
--- a/apps/api/src/services/ai/schemas/ai.py
+++ b/apps/api/src/services/ai/schemas/ai.py
@ -0,0 +1,12 @@
+from pydantic import BaseModel
+
+
+class StartActivityAIChatSession(BaseModel):
+    activity_uuid: str
+    message: str
+
+
+class SendActivityAIChatMessage(BaseModel):
+    aichat_uuid: str
+    activity_uuid: str
+    message: str
--- a/apps/api/src/services/courses/activities/utils.py
+++ b/apps/api/src/services/courses/activities/utils.py
@ -0,0 +1,78 @@
+from src.db.activities import ActivityRead
+from src.db.courses import CourseRead
+
+def structure_activity_content_by_type(activity):
+    ### Get Headings, Texts, Callouts, Answers and Paragraphs from the activity as a big list of strings (text only) and return it
+
+    # Get Headings
+    headings = []
+    for item in activity["content"]:
+        if item["type"] == "heading":
+            headings.append(item["content"][0]["text"])
+
+    # Get Callouts
+    callouts = []
+    for item in activity["content"]:
+        if item["type"] == "calloutInfo":
+            # Get every type of text in the callout
+            text = ""
+            for text_item in item["content"]:
+                text += text_item["text"]
+            callouts.append(text)
+
+    # Get Paragraphs
+    paragraphs = []
+    for item in activity["content"]:
+        if item["type"] == "paragraph":
+            paragraphs.append(item["content"][0]["text"])
+
+    # TODO: Get Questions and Answers (if any)
+
+    data_array = []
+
+    # Add Headings
+    data_array.append({"Headings": headings})
+
+    # Add Callouts
+    data_array.append({"Callouts": callouts})
+
+    # Add Paragraphs
+    data_array.append({"Paragraphs": paragraphs})
+
+    return data_array
+
+
+def serialize_activity_text_to_ai_comprehensible_text(data_array, course: CourseRead, activity: ActivityRead):
+    ### Serialize the text to a format that is comprehensible by the AI
+
+    # Serialize Headings
+    serialized_headings = ""
+    for heading in data_array[0]["Headings"]:
+        serialized_headings += heading + " "
+
+    # Serialize Callouts
+    serialized_callouts = ""
+
+    for callout in data_array[1]["Callouts"]:
+        serialized_callouts += callout + " "
+
+    # Serialize Paragraphs
+    serialized_paragraphs = ""
+    for paragraph in data_array[2]["Paragraphs"]:
+        serialized_paragraphs += paragraph + " "
+
+    # Get a text that is comprehensible by the AI
+    text = (
+        'Use this as a context ' +
+        'This is a course about "' + course.name + '". '
+        + 'This is a lecture about "' + activity.name + '". '
+        'These are the headings: "'
+        + serialized_headings
+        + '" These are the callouts: "'
+        + serialized_callouts
+        + '" These are the paragraphs: "'
+        + serialized_paragraphs
+        + '"'
+    )
+
+    return text