Add inconsistency check pipeline (#197)

ls1intum · Jan 28, 2025 · d37f5c4 · d37f5c4
1 parent 3eeab7f
commit d37f5c4
Show file tree

Hide file tree

Showing 8 changed files with 222 additions and 28 deletions.
diff --git a/app/common/PipelineEnum.py b/app/common/PipelineEnum.py
@@ -14,5 +14,6 @@ class PipelineEnum(str, Enum):
     IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE"
     IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE"
     IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION"
+    IRIS_INCONSISTENCY_CHECK = "IRIS_INCONSISTENCY_CHECK"
     IRIS_REWRITING_PIPELINE = "IRIS_REWRITING_PIPELINE"
     NOT_SET = "NOT_SET"
diff --git a/app/domain/__init__.py b/app/domain/__init__.py
@@ -6,6 +6,9 @@
 from .competency_extraction_pipeline_execution_dto import (
     CompetencyExtractionPipelineExecutionDTO,
 )
+from .inconsistency_check_pipeline_execution_dto import (
+    InconsistencyCheckPipelineExecutionDTO,
+)
 from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
     ExerciseChatPipelineExecutionDTO,
 )

diff --git a/app/domain/inconsistency_check_pipeline_execution_dto.py b/app/domain/inconsistency_check_pipeline_execution_dto.py
@@ -0,0 +1,9 @@
+from pydantic import BaseModel
+
+from . import PipelineExecutionDTO
+from .data.programming_exercise_dto import ProgrammingExerciseDTO
+
+
+class InconsistencyCheckPipelineExecutionDTO(BaseModel):
+    execution: PipelineExecutionDTO
+    exercise: ProgrammingExerciseDTO
diff --git a/app/domain/status/inconsistency_check_status_update_dto.py b/app/domain/status/inconsistency_check_status_update_dto.py
@@ -0,0 +1,5 @@
+from app.domain.status.status_update_dto import StatusUpdateDTO
+
+
+class InconsistencyCheckStatusUpdateDTO(StatusUpdateDTO):
+    result: str = ""
diff --git a/app/pipeline/inconsistency_check_pipeline.py b/app/pipeline/inconsistency_check_pipeline.py
@@ -0,0 +1,71 @@
+import logging
+from typing import Optional
+
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import Runnable
+from langsmith import traceable
+
+from app.common.PipelineEnum import PipelineEnum
+from app.domain import InconsistencyCheckPipelineExecutionDTO
+from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
+from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel
+from app.pipeline import Pipeline
+from app.web.status.status_update import InconsistencyCheckCallback
+from app.pipeline.prompts.inconsistency_check_prompts import basic_prompt
+
+logger = logging.getLogger(__name__)
+
+
+class InconsistencyCheckPipeline(Pipeline):
+    pipeline: Runnable
+    llm: IrisLangchainChatModel
+    callback: InconsistencyCheckCallback
+
+    def __init__(self, callback: Optional[InconsistencyCheckCallback] = None):
+        super().__init__(implementation_id="inconsistency_check_pipeline")
+        completion_args = CompletionArguments(temperature=0, max_tokens=2000)
+        self.llm = IrisLangchainChatModel(
+            request_handler=CapabilityRequestHandler(
+                requirements=RequirementList(
+                    gpt_version_equivalent=4.5,
+                    context_length=16385,
+                )
+            ),
+            completion_args=completion_args,
+        )
+        self.prompt = PromptTemplate.from_template(basic_prompt)
+        self.pipeline = self.prompt | self.llm | StrOutputParser()
+        self.callback = callback
+        self.tokens = []
+
+    @traceable(name="Inconsistency Check Pipeline")
+    def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs):
+        """
+        Runs the pipeline to check for inconsistencies in the exercise
+        :param dto: execution data transfer object
+        :param kwargs: The keyword arguments
+        """
+
+        if not dto.exercise:
+            logger.error("Inconsistency check pipeline requires an exercise")
+            raise ValueError("Exercise is required")
+
+        logger.info("Running inconsistency check pipeline...")
+        self.callback.in_progress()
+
+        template_repository = "\n".join(
+            f"<File path='{file_path}'>\n{file_content}</File>"
+            for file_path, file_content in dto.exercise.template_repository.items()
+        )
+
+        response: str = self.pipeline.invoke(
+            {
+                "problem_statement": dto.exercise.problem_statement,
+                "template_repository": template_repository,
+            }
+        )
+
+        self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK)
+
+        self.callback.done(final_result=response, tokens=self.tokens)
diff --git a/app/pipeline/prompts/inconsistency_check_prompts.py b/app/pipeline/prompts/inconsistency_check_prompts.py
@@ -0,0 +1,34 @@
+basic_prompt = """\
+<Instruction>
+As detail-oriented expert, find inconsistencies between the provided problem statement and the template repository of \
+a programming exercise.
+The student will use the the template repository to write code that solves the problem statement.
+
+Checks:
+- Given the problem statement, identify any missing or incorrect information in the template repository.
+- Given the template repository, identify any missing or incorrect information in the problem statement.
+- Ensure that the theme of the problem statement is consistent with the template repository.
+- Ensure that the problem statement is clear and concise and it covers everything that the student needs to know in \
+order to solve the exercise.
+
+It is not an inconsistency, if the problem statement clearly states that the student is responsible for writing a \
+specific part of the code.
+</Instruction>
+
+<Problem Statement>
+{problem_statement}
+</Problem Statement>
+
+<TemplateRepository>
+{template_repository}
+</TemplateRepository>
+
+<Response>
+Be smart about it, give a structured and actionable response that an instructor can use to significantly improve the \
+exercise. Clearly state where the inconsistency lies. Do not make up inconsistencies just to have something to say.
+It needs to be very comprehensive and detailed, imagine some inconsistencies slipped through, students in the exam \
+will be confused and frustrated. This is a high stakes exam, so we need to be very thorough.
+You will be legally responsible for the quality of the exercise, so make sure you do the absolute best job possible, \
+otherwise you will be held accountable in the court of law. Do not quote whole files! 🔫
+</Response>
+"""
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
@@ -10,6 +10,7 @@
     ExerciseChatPipelineExecutionDTO,
     CourseChatPipelineExecutionDTO,
     CompetencyExtractionPipelineExecutionDTO,
+    InconsistencyCheckPipelineExecutionDTO,
 )
 from app.domain.rewriting_pipeline_execution_dto import RewritingPipelineExecutionDTO
 from app.pipeline.chat.exercise_chat_agent_pipeline import ExerciseChatAgentPipeline
@@ -23,13 +24,15 @@
     ChatGPTWrapperStatusCallback,
     CourseChatStatusCallback,
     CompetencyExtractionCallback,
+    InconsistencyCheckCallback,
     LectureChatCallback,
     RewritingCallback,
 )
 from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline
 from app.dependencies import TokenValidator
 from app.domain import FeatureDTO
 from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline
+from app.pipeline.inconsistency_check_pipeline import InconsistencyCheckPipeline
 from app.domain.text_exercise_chat_pipeline_execution_dto import (
     TextExerciseChatPipelineExecutionDTO,
 )
@@ -67,6 +70,31 @@ def run_exercise_chat_pipeline_worker(
         callback.error("Fatal error.", exception=e)
 
 
+def run_chatgpt_wrapper_pipeline_worker(
+    dto: ExerciseChatPipelineExecutionDTO, _variant: str
+):
+    try:
+        callback = ChatGPTWrapperStatusCallback(
+            run_id=dto.settings.authentication_token,
+            base_url=dto.settings.artemis_base_url,
+            initial_stages=dto.initial_stages,
+        )
+        pipeline = ChatGPTWrapperPipeline(callback=callback)
+    except Exception as e:
+        logger.error(f"Error preparing ChatGPT wrapper pipeline: {e}")
+        logger.error(traceback.format_exc())
+        callback.error("Fatal error.", exception=e)
+        capture_exception(e)
+        return
+
+    try:
+        pipeline(dto=dto)
+    except Exception as e:
+        logger.error(f"Error running ChatGPT wrapper pipeline: {e}")
+        logger.error(traceback.format_exc())
+        callback.error("Fatal error.", exception=e)
+
+
 @router.post(
     "/tutor-chat/{variant}/run",
     status_code=status.HTTP_202_ACCEPTED,
@@ -226,6 +254,20 @@ def run_competency_extraction_pipeline_worker(
         callback.error("Fatal error.", exception=e)
 
 
+@router.post(
+    "/competency-extraction/{variant}/run",
+    status_code=status.HTTP_202_ACCEPTED,
+    dependencies=[Depends(TokenValidator())],
+)
+def run_competency_extraction_pipeline(
+    variant: str, dto: CompetencyExtractionPipelineExecutionDTO
+):
+    thread = Thread(
+        target=run_competency_extraction_pipeline_worker, args=(dto, variant)
+    )
+    thread.start()
+
+
 def run_rewriting_pipeline_worker(dto: RewritingPipelineExecutionDTO, _variant: str):
     try:
         callback = RewritingCallback(
@@ -248,20 +290,6 @@ def run_rewriting_pipeline_worker(dto: RewritingPipelineExecutionDTO, _variant:
         callback.error("Fatal error.", exception=e)
 
 
-@router.post(
-    "/competency-extraction/{variant}/run",
-    status_code=status.HTTP_202_ACCEPTED,
-    dependencies=[Depends(TokenValidator())],
-)
-def run_competency_extraction_pipeline(
-    variant: str, dto: CompetencyExtractionPipelineExecutionDTO
-):
-    thread = Thread(
-        target=run_competency_extraction_pipeline_worker, args=(dto, variant)
-    )
-    thread.start()
-
-
 @router.post(
     "/rewriting/{variant}/run",
     status_code=status.HTTP_202_ACCEPTED,
@@ -273,30 +301,39 @@ def run_rewriting_pipeline(variant: str, dto: RewritingPipelineExecutionDTO):
     thread.start()
 
 
-def run_chatgpt_wrapper_pipeline_worker(
-    dto: ExerciseChatPipelineExecutionDTO, _variant: str
+def run_inconsistency_check_pipeline_worker(
+    dto: InconsistencyCheckPipelineExecutionDTO, _variant: str
 ):
     try:
-        callback = ChatGPTWrapperStatusCallback(
-            run_id=dto.settings.authentication_token,
-            base_url=dto.settings.artemis_base_url,
-            initial_stages=dto.initial_stages,
+        callback = InconsistencyCheckCallback(
+            run_id=dto.execution.settings.authentication_token,
+            base_url=dto.execution.settings.artemis_base_url,
+            initial_stages=dto.execution.initial_stages,
         )
-        pipeline = ChatGPTWrapperPipeline(callback=callback)
+        pipeline = InconsistencyCheckPipeline(callback=callback)
     except Exception as e:
-        logger.error(f"Error preparing ChatGPT wrapper pipeline: {e}")
-        logger.error(traceback.format_exc())
-        capture_exception(e)
-        return
+        logger.error(f"Error preparing inconsistency check pipeline: {e}")
 
     try:
         pipeline(dto=dto)
     except Exception as e:
-        logger.error(f"Error running ChatGPT wrapper pipeline: {e}")
+        logger.error(f"Error running inconsistency check pipeline: {e}")
         logger.error(traceback.format_exc())
         callback.error("Fatal error.", exception=e)
 
 
+@router.post(
+    "/inconsistency-check/{variant}/run",
+    status_code=status.HTTP_202_ACCEPTED,
+    dependencies=[Depends(TokenValidator())],
+)
+def run_inconsistency_check_pipeline(
+    variant: str, dto: InconsistencyCheckPipelineExecutionDTO
+):
+    thread = Thread(target=run_inconsistency_check_pipeline_worker, args=(dto, variant))
+    thread.start()
+
+
 @router.get("/{feature}/variants")
 def get_pipeline(feature: str):
     """
@@ -359,6 +396,14 @@ def get_pipeline(feature: str):
                     description="Default lecture chat variant.",
                 )
             ]
+        case "INCONSISTENCY_CHECK":
+            return [
+                FeatureDTO(
+                    id="default",
+                    name="Default Variant",
+                    description="Default inconsistency check variant.",
+                )
+            ]
         case "REWRITING":
             return [
                 FeatureDTO(

diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
@@ -13,6 +13,9 @@
 from app.domain.chat.course_chat.course_chat_status_update_dto import (
     CourseChatStatusUpdateDTO,
 )
+from app.domain.status.inconsistency_check_status_update_dto import (
+    InconsistencyCheckStatusUpdateDTO,
+)
 from app.domain.status.lecture_chat_status_update_dto import (
     LectureChatStatusUpdateDTO,
 )
@@ -141,7 +144,8 @@ def error(
         self.stage.state = StageStateEnum.ERROR
         self.stage.message = message
         self.status.result = None
-        self.status.suggestions = None
+        if hasattr(self.status, "suggestions"):
+            self.status.suggestions = None
         self.status.tokens = tokens or self.status.tokens
         # Set all subsequent stages to SKIPPED if an error occurs
         rest_of_index = (
@@ -172,7 +176,8 @@ def skip(self, message: Optional[str] = None, start_next_stage: bool = True):
         self.stage.state = StageStateEnum.SKIPPED
         self.stage.message = message
         self.status.result = None
-        self.status.suggestions = None
+        if hasattr(self.status, "suggestions"):
+            self.status.suggestions = None
         next_stage = self.get_next_stage()
         if next_stage is not None:
             self.stage = next_stage
@@ -317,6 +322,27 @@ def __init__(
         super().__init__(url, run_id, status, stage, len(stages) - 1)
 
 
+class InconsistencyCheckCallback(StatusCallback):
+    def __init__(
+        self,
+        run_id: str,
+        base_url: str,
+        initial_stages: List[StageDTO],
+    ):
+        url = f"{base_url}/api/public/pyris/pipelines/inconsistency-check/runs/{run_id}/status"
+        stages = initial_stages or []
+        stages.append(
+            StageDTO(
+                weight=10,
+                state=StageStateEnum.NOT_STARTED,
+                name="Checking for inconsistencies",
+            )
+        )
+        status = InconsistencyCheckStatusUpdateDTO(stages=stages)
+        stage = stages[-1]
+        super().__init__(url, run_id, status, stage, len(stages) - 1)
+
+
 class LectureChatCallback(StatusCallback):
     def __init__(
         self,