From 9c899e8104eb9abf1babcce03e2ad2a770f6faa3 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Thu, 16 Jan 2025 11:06:51 +0100 Subject: [PATCH 01/23] Setup rephrasing on new branch without FAQ --- app/common/PipelineEnum.py | 1 + .../rephrasing_pipeline_execution_dto.py | 11 +++ app/pipeline/prompts/faq_rephrasal.py | 15 ++++ app/pipeline/rephrasing_pipeline.py | 68 +++++++++++++++++++ app/web/routers/pipelines.py | 55 ++++++++++++++- app/web/status/status_update.py | 21 ++++++ 6 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 app/domain/rephrasing_pipeline_execution_dto.py create mode 100644 app/pipeline/prompts/faq_rephrasal.py create mode 100644 app/pipeline/rephrasing_pipeline.py diff --git a/app/common/PipelineEnum.py b/app/common/PipelineEnum.py index fc439a65..d64802a0 100644 --- a/app/common/PipelineEnum.py +++ b/app/common/PipelineEnum.py @@ -14,4 +14,5 @@ class PipelineEnum(str, Enum): IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" + IRIS_REPHRASING_PIPELINE = "IRIS_REPHRASING_PIPELINE" NOT_SET = "NOT_SET" diff --git a/app/domain/rephrasing_pipeline_execution_dto.py b/app/domain/rephrasing_pipeline_execution_dto.py new file mode 100644 index 00000000..778ee42b --- /dev/null +++ b/app/domain/rephrasing_pipeline_execution_dto.py @@ -0,0 +1,11 @@ +from typing import List + +from pydantic import Field, BaseModel + +from . import PipelineExecutionDTO +from .data.competency_dto import CompetencyTaxonomy, Competency + + +class RephrasingPipelineExecutionDTO(BaseModel): + execution: PipelineExecutionDTO + to_be_rephrased : str = Field(alias="toBeRephrased") diff --git a/app/pipeline/prompts/faq_rephrasal.py b/app/pipeline/prompts/faq_rephrasal.py new file mode 100644 index 00000000..4654bf0e --- /dev/null +++ b/app/pipeline/prompts/faq_rephrasal.py @@ -0,0 +1,15 @@ +system_prompt_faq = """ +You are a skilled tutor with expertise in computer science and practical applications. Your task is to proofread and refine the given text of an FAQ. Specifically, you should: + +1. Correct all spelling and grammatical errors. +2. Ensure the text is written in simple and clear language, making it easy to understand for students. +3. Preserve the original meaning and intent of the text. +4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, convert them into complete sentences. +5. Make sure to use the original language of the input text +6. Avoid repeating any information that is already present in the text. +7. Make sure to keep the markdown formatting intact and add formatting for the most important information + +{rephrased_text} + +Respond with a single string containing only the improved version of the text. Your output will be used as a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. +""" \ No newline at end of file diff --git a/app/pipeline/rephrasing_pipeline.py b/app/pipeline/rephrasing_pipeline.py new file mode 100644 index 00000000..9c315412 --- /dev/null +++ b/app/pipeline/rephrasing_pipeline.py @@ -0,0 +1,68 @@ +import logging +from typing import Optional + +from langchain.output_parsers import PydanticOutputParser +from langchain_core.prompts import ( + ChatPromptTemplate, +) + +from app.common.PipelineEnum import PipelineEnum +from app.common.pyris_message import PyrisMessage, IrisMessageRole +from app.domain.data.text_message_content_dto import TextMessageContentDTO +from app.domain.data.competency_dto import Competency +from app.domain.rephrasing_pipeline_execution_dto import RephrasingPipelineExecutionDTO +from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments +from app.pipeline import Pipeline +from app.pipeline.prompts.faq_rephrasal import system_prompt_faq +from app.web.status.status_update import RephrasingCallback + +logger = logging.getLogger(__name__) + + +class RephrasingPipeline(Pipeline): + callback: RephrasingCallback + request_handler: CapabilityRequestHandler + output_parser: PydanticOutputParser + + def __init__(self, callback: Optional[RephrasingCallback] = None): + super().__init__( + implementation_id="rephrasing_pipeline_reference_impl" + ) + self.callback = callback + self.request_handler = CapabilityRequestHandler( + requirements=RequirementList( + gpt_version_equivalent=4.5, + context_length=16385, + ) + ) + self.output_parser = PydanticOutputParser(pydantic_object=Competency) + self.tokens = [] + + def __call__( + self, + dto: RephrasingPipelineExecutionDTO, + prompt: Optional[ChatPromptTemplate] = None, + **kwargs, + ): + if not dto.to_be_rephrased: + raise ValueError("You need to provide a text to rephrase") + + # + prompt = system_prompt_faq.format( + rephrased_text=dto.to_be_rephrased, + ) + prompt = PyrisMessage( + sender=IrisMessageRole.SYSTEM, + contents=[TextMessageContentDTO(text_content=prompt)], + ) + + response = self.request_handler.chat( + [prompt], CompletionArguments(temperature=0.4) + ) + self._append_tokens( + response.token_usage, PipelineEnum.IRIS_REPHRASING_PIPELINE + ) + response = response.contents[0].text_content + final_result = response + logging.info(f"Final rephrased text: {final_result}") + self.callback.done(final_result=final_result, tokens=self.tokens) diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index fbc3c9f3..2fd1a168 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -11,16 +11,18 @@ CourseChatPipelineExecutionDTO, CompetencyExtractionPipelineExecutionDTO, ) +from app.domain.rephrasing_pipeline_execution_dto import RephrasingPipelineExecutionDTO from app.pipeline.chat.exercise_chat_agent_pipeline import ExerciseChatAgentPipeline from app.domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import ( LectureChatPipelineExecutionDTO, ) from app.pipeline.chat.lecture_chat_pipeline import LectureChatPipeline +from app.pipeline.rephrasing_pipeline import RephrasingPipeline from app.web.status.status_update import ( ExerciseChatStatusCallback, CourseChatStatusCallback, CompetencyExtractionCallback, - LectureChatCallback, + LectureChatCallback, RephrasingCallback, ) from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline from app.dependencies import TokenValidator @@ -218,6 +220,31 @@ def run_competency_extraction_pipeline_worker( callback.error("Fatal error.", exception=e) +def run_rephrasing_pipeline_worker( + dto: RephrasingPipelineExecutionDTO, _variant: str +): + try: + # Replace with actual Callback class + callback = RephrasingCallback( + run_id=dto.execution.settings.authentication_token, + base_url=dto.execution.settings.artemis_base_url, + initial_stages=dto.execution.initial_stages, + ) + #Replace with actual pipeline RefrasingPipeline + pipeline = RephrasingPipeline(callback=callback) + except Exception as e: + logger.error(f"Error preparing rephrasing pipeline: {e}") + logger.error(traceback.format_exc()) + capture_exception(e) + return + + try: + pipeline(dto=dto) + except Exception as e: + logger.error(f"Error running competency extraction pipeline: {e}") + logger.error(traceback.format_exc()) + callback.error("Fatal error.", exception=e) + @router.post( "/competency-extraction/{variant}/run", status_code=status.HTTP_202_ACCEPTED, @@ -232,6 +259,22 @@ def run_competency_extraction_pipeline( thread.start() + +@router.post( + "/rephrasing/{variant}/run", + status_code=status.HTTP_202_ACCEPTED, + dependencies=[Depends(TokenValidator())], +) +def run_rephrasing_pipeline( + variant: str, dto: RephrasingPipelineExecutionDTO +): + logger.info(f"Rephrasing pipeline started with variant: {variant} and dto: {dto}") + thread = Thread( + target=run_rephrasing_pipeline_worker, args=(dto, variant) + ) + thread.start() + + @router.get("/{feature}/variants") def get_pipeline(feature: str): """ @@ -294,5 +337,15 @@ def get_pipeline(feature: str): description="Default lecture chat variant.", ) ] + + case "REPHRASING": + return [ + FeatureDTO( + id="default", + name="Default Variant", + description="Default rephrasing variant.", + ) + ] + case _: return Response(status_code=status.HTTP_400_BAD_REQUEST) diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 39ea3504..6371f848 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -15,6 +15,7 @@ from app.domain.status.lecture_chat_status_update_dto import ( LectureChatStatusUpdateDTO, ) +from app.domain.status.rephrasing_status_update_dto import RephrasingStatusUpdateDTO from app.domain.status.stage_state_dto import StageStateEnum from app.domain.status.stage_dto import StageDTO from app.domain.status.text_exercise_chat_status_update_dto import ( @@ -274,6 +275,26 @@ def __init__( stage = stages[-1] super().__init__(url, run_id, status, stage, len(stages) - 1) +class RephrasingCallback(StatusCallback): + def __init__( + self, + run_id: str, + base_url: str, + initial_stages: List[StageDTO], + ): + url = f"{base_url}/api/public/pyris/pipelines/rephrasing/runs/{run_id}/status" + stages = initial_stages or [] + stages.append( + StageDTO( + weight=10, + state=StageStateEnum.NOT_STARTED, + name="Generating Rephrasing", + ) + ) + status = RephrasingStatusUpdateDTO(stages=stages) + stage = stages[-1] + super().__init__(url, run_id, status, stage, len(stages) - 1) + class LectureChatCallback(StatusCallback): def __init__( From cf272ef1982577771bc50d809f36d2ae58536522 Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Thu, 16 Jan 2025 13:07:13 +0100 Subject: [PATCH 02/23] add missing DTO --- app/domain/status/rephrasing_status_update_dto.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 app/domain/status/rephrasing_status_update_dto.py diff --git a/app/domain/status/rephrasing_status_update_dto.py b/app/domain/status/rephrasing_status_update_dto.py new file mode 100644 index 00000000..bc098518 --- /dev/null +++ b/app/domain/status/rephrasing_status_update_dto.py @@ -0,0 +1,6 @@ +from app.domain.data.competency_dto import Competency +from app.domain.status.status_update_dto import StatusUpdateDTO + + +class RephrasingStatusUpdateDTO(StatusUpdateDTO): + result: str = "" From 3a7dbd97d16aaf10067f90550a4ac4336c9a0b3a Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Thu, 16 Jan 2025 13:21:02 +0100 Subject: [PATCH 03/23] Minor rephrasement of Prompt, fixed docker setup for windows --- app/pipeline/prompts/faq_rephrasal.py | 2 +- docker/pyris-dev.yml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rephrasal.py b/app/pipeline/prompts/faq_rephrasal.py index 4654bf0e..6a800b72 100644 --- a/app/pipeline/prompts/faq_rephrasal.py +++ b/app/pipeline/prompts/faq_rephrasal.py @@ -1,5 +1,5 @@ system_prompt_faq = """ -You are a skilled tutor with expertise in computer science and practical applications. Your task is to proofread and refine the given text of an FAQ. Specifically, you should: +You are a excellent tutor with expertise in computer science and practical applications teaching an university course. Your task is to proofread and refine the given text of an FAQ. Specifically, you should: 1. Correct all spelling and grammatical errors. 2. Ensure the text is written in simple and clear language, making it easy to understand for students. diff --git a/docker/pyris-dev.yml b/docker/pyris-dev.yml index 7d1a956d..cfb995ea 100644 --- a/docker/pyris-dev.yml +++ b/docker/pyris-dev.yml @@ -14,6 +14,8 @@ services: - ../llm_config.local.yml:/config/llm_config.yml:ro networks: - pyris + ports: + - 8000:8000 weaviate: extends: From 8b7a7c94f6aba90f55782445f10d77fbe98e35bf Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Thu, 16 Jan 2025 20:15:18 +0100 Subject: [PATCH 04/23] Renamed everything to Rewrite --- app/common/PipelineEnum.py | 2 +- ...py => rewriting_pipeline_execution_dto.py} | 4 +-- ..._dto.py => rewriting_status_update_dto.py} | 2 +- .../{faq_rephrasal.py => faq_rewriting.py} | 2 +- ...sing_pipeline.py => rewriting_pipeline.py} | 26 +++++++-------- app/web/routers/pipelines.py | 32 +++++++++---------- app/web/status/status_update.py | 10 +++--- 7 files changed, 39 insertions(+), 39 deletions(-) rename app/domain/{rephrasing_pipeline_execution_dto.py => rewriting_pipeline_execution_dto.py} (65%) rename app/domain/status/{rephrasing_status_update_dto.py => rewriting_status_update_dto.py} (73%) rename app/pipeline/prompts/{faq_rephrasal.py => faq_rewriting.py} (98%) rename app/pipeline/{rephrasing_pipeline.py => rewriting_pipeline.py} (68%) diff --git a/app/common/PipelineEnum.py b/app/common/PipelineEnum.py index d64802a0..b6f84a80 100644 --- a/app/common/PipelineEnum.py +++ b/app/common/PipelineEnum.py @@ -14,5 +14,5 @@ class PipelineEnum(str, Enum): IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" - IRIS_REPHRASING_PIPELINE = "IRIS_REPHRASING_PIPELINE" + IRIS_REWRITING_PIPELINE = "IRIS_REWRITING_PIPELINE" NOT_SET = "NOT_SET" diff --git a/app/domain/rephrasing_pipeline_execution_dto.py b/app/domain/rewriting_pipeline_execution_dto.py similarity index 65% rename from app/domain/rephrasing_pipeline_execution_dto.py rename to app/domain/rewriting_pipeline_execution_dto.py index 778ee42b..9af08b5a 100644 --- a/app/domain/rephrasing_pipeline_execution_dto.py +++ b/app/domain/rewriting_pipeline_execution_dto.py @@ -6,6 +6,6 @@ from .data.competency_dto import CompetencyTaxonomy, Competency -class RephrasingPipelineExecutionDTO(BaseModel): +class RewritingPipelineExecutionDTO(BaseModel): execution: PipelineExecutionDTO - to_be_rephrased : str = Field(alias="toBeRephrased") + to_be_rewritten : str = Field(alias="toBeRewritten") diff --git a/app/domain/status/rephrasing_status_update_dto.py b/app/domain/status/rewriting_status_update_dto.py similarity index 73% rename from app/domain/status/rephrasing_status_update_dto.py rename to app/domain/status/rewriting_status_update_dto.py index bc098518..cdab8bb1 100644 --- a/app/domain/status/rephrasing_status_update_dto.py +++ b/app/domain/status/rewriting_status_update_dto.py @@ -2,5 +2,5 @@ from app.domain.status.status_update_dto import StatusUpdateDTO -class RephrasingStatusUpdateDTO(StatusUpdateDTO): +class RewritingStatusUpdateDTO(StatusUpdateDTO): result: str = "" diff --git a/app/pipeline/prompts/faq_rephrasal.py b/app/pipeline/prompts/faq_rewriting.py similarity index 98% rename from app/pipeline/prompts/faq_rephrasal.py rename to app/pipeline/prompts/faq_rewriting.py index 6a800b72..af74085c 100644 --- a/app/pipeline/prompts/faq_rephrasal.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -9,7 +9,7 @@ 6. Avoid repeating any information that is already present in the text. 7. Make sure to keep the markdown formatting intact and add formatting for the most important information -{rephrased_text} +{rewritten_text} Respond with a single string containing only the improved version of the text. Your output will be used as a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. """ \ No newline at end of file diff --git a/app/pipeline/rephrasing_pipeline.py b/app/pipeline/rewriting_pipeline.py similarity index 68% rename from app/pipeline/rephrasing_pipeline.py rename to app/pipeline/rewriting_pipeline.py index 9c315412..95c1b503 100644 --- a/app/pipeline/rephrasing_pipeline.py +++ b/app/pipeline/rewriting_pipeline.py @@ -10,23 +10,23 @@ from app.common.pyris_message import PyrisMessage, IrisMessageRole from app.domain.data.text_message_content_dto import TextMessageContentDTO from app.domain.data.competency_dto import Competency -from app.domain.rephrasing_pipeline_execution_dto import RephrasingPipelineExecutionDTO +from app.domain.rewriting_pipeline_execution_dto import RewritingPipelineExecutionDTO from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments from app.pipeline import Pipeline -from app.pipeline.prompts.faq_rephrasal import system_prompt_faq -from app.web.status.status_update import RephrasingCallback +from app.pipeline.prompts.faq_rewriting import system_prompt_faq +from app.web.status.status_update import RewritingCallback logger = logging.getLogger(__name__) -class RephrasingPipeline(Pipeline): - callback: RephrasingCallback +class RewritingPipeline(Pipeline): + callback: RewritingCallback request_handler: CapabilityRequestHandler output_parser: PydanticOutputParser - def __init__(self, callback: Optional[RephrasingCallback] = None): + def __init__(self, callback: Optional[RewritingCallback] = None): super().__init__( - implementation_id="rephrasing_pipeline_reference_impl" + implementation_id="rewriting_pipeline_reference_impl" ) self.callback = callback self.request_handler = CapabilityRequestHandler( @@ -40,16 +40,16 @@ def __init__(self, callback: Optional[RephrasingCallback] = None): def __call__( self, - dto: RephrasingPipelineExecutionDTO, + dto: RewritingPipelineExecutionDTO, prompt: Optional[ChatPromptTemplate] = None, **kwargs, ): - if not dto.to_be_rephrased: - raise ValueError("You need to provide a text to rephrase") + if not dto.to_be_rewritten: + raise ValueError("You need to provide a text to rewrite") # prompt = system_prompt_faq.format( - rephrased_text=dto.to_be_rephrased, + rewritten_text=dto.to_be_rewritten, ) prompt = PyrisMessage( sender=IrisMessageRole.SYSTEM, @@ -60,9 +60,9 @@ def __call__( [prompt], CompletionArguments(temperature=0.4) ) self._append_tokens( - response.token_usage, PipelineEnum.IRIS_REPHRASING_PIPELINE + response.token_usage, PipelineEnum.IRIS_REWRITING_PIPELINE ) response = response.contents[0].text_content final_result = response - logging.info(f"Final rephrased text: {final_result}") + logging.info(f"Final rewritten text: {final_result}") self.callback.done(final_result=final_result, tokens=self.tokens) diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 2fd1a168..2167dd94 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -11,18 +11,18 @@ CourseChatPipelineExecutionDTO, CompetencyExtractionPipelineExecutionDTO, ) -from app.domain.rephrasing_pipeline_execution_dto import RephrasingPipelineExecutionDTO +from app.domain.rewriting_pipeline_execution_dto import RewritingPipelineExecutionDTO from app.pipeline.chat.exercise_chat_agent_pipeline import ExerciseChatAgentPipeline from app.domain.chat.lecture_chat.lecture_chat_pipeline_execution_dto import ( LectureChatPipelineExecutionDTO, ) from app.pipeline.chat.lecture_chat_pipeline import LectureChatPipeline -from app.pipeline.rephrasing_pipeline import RephrasingPipeline +from app.pipeline.rewriting_pipeline import RewritingPipeline from app.web.status.status_update import ( ExerciseChatStatusCallback, CourseChatStatusCallback, CompetencyExtractionCallback, - LectureChatCallback, RephrasingCallback, + LectureChatCallback, RewritingCallback, ) from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline from app.dependencies import TokenValidator @@ -220,20 +220,20 @@ def run_competency_extraction_pipeline_worker( callback.error("Fatal error.", exception=e) -def run_rephrasing_pipeline_worker( - dto: RephrasingPipelineExecutionDTO, _variant: str +def run_rewriting_pipeline_worker( + dto: RewritingPipelineExecutionDTO, _variant: str ): try: # Replace with actual Callback class - callback = RephrasingCallback( + callback = RewritingCallback( run_id=dto.execution.settings.authentication_token, base_url=dto.execution.settings.artemis_base_url, initial_stages=dto.execution.initial_stages, ) #Replace with actual pipeline RefrasingPipeline - pipeline = RephrasingPipeline(callback=callback) + pipeline = RewritingPipeline(callback=callback) except Exception as e: - logger.error(f"Error preparing rephrasing pipeline: {e}") + logger.error(f"Error preparing rewriting pipeline: {e}") logger.error(traceback.format_exc()) capture_exception(e) return @@ -241,7 +241,7 @@ def run_rephrasing_pipeline_worker( try: pipeline(dto=dto) except Exception as e: - logger.error(f"Error running competency extraction pipeline: {e}") + logger.error(f"Error running rewriting extraction pipeline: {e}") logger.error(traceback.format_exc()) callback.error("Fatal error.", exception=e) @@ -261,16 +261,16 @@ def run_competency_extraction_pipeline( @router.post( - "/rephrasing/{variant}/run", + "/rewriting/{variant}/run", status_code=status.HTTP_202_ACCEPTED, dependencies=[Depends(TokenValidator())], ) -def run_rephrasing_pipeline( - variant: str, dto: RephrasingPipelineExecutionDTO +def run_rewriting_pipeline( + variant: str, dto: RewritingPipelineExecutionDTO ): - logger.info(f"Rephrasing pipeline started with variant: {variant} and dto: {dto}") + logger.info(f"Rewriting pipeline started with variant: {variant} and dto: {dto}") thread = Thread( - target=run_rephrasing_pipeline_worker, args=(dto, variant) + target=run_rewriting_pipeline_worker, args=(dto, variant) ) thread.start() @@ -338,12 +338,12 @@ def get_pipeline(feature: str): ) ] - case "REPHRASING": + case "REWRITING": return [ FeatureDTO( id="default", name="Default Variant", - description="Default rephrasing variant.", + description="Default rewriting variant.", ) ] diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 6371f848..28b8338f 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -15,7 +15,7 @@ from app.domain.status.lecture_chat_status_update_dto import ( LectureChatStatusUpdateDTO, ) -from app.domain.status.rephrasing_status_update_dto import RephrasingStatusUpdateDTO +from app.domain.status.rewriting_status_update_dto import RewritingStatusUpdateDTO from app.domain.status.stage_state_dto import StageStateEnum from app.domain.status.stage_dto import StageDTO from app.domain.status.text_exercise_chat_status_update_dto import ( @@ -275,23 +275,23 @@ def __init__( stage = stages[-1] super().__init__(url, run_id, status, stage, len(stages) - 1) -class RephrasingCallback(StatusCallback): +class RewritingCallback(StatusCallback): def __init__( self, run_id: str, base_url: str, initial_stages: List[StageDTO], ): - url = f"{base_url}/api/public/pyris/pipelines/rephrasing/runs/{run_id}/status" + url = f"{base_url}/api/public/pyris/pipelines/rewriting/runs/{run_id}/status" stages = initial_stages or [] stages.append( StageDTO( weight=10, state=StageStateEnum.NOT_STARTED, - name="Generating Rephrasing", + name="Generating Rewritting", ) ) - status = RephrasingStatusUpdateDTO(stages=stages) + status = RewritingStatusUpdateDTO(stages=stages) stage = stages[-1] super().__init__(url, run_id, status, stage, len(stages) - 1) From 2ff6d8213db8fdc0efaf9f03969e42e41020a0eb Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Fri, 17 Jan 2025 12:14:58 +0100 Subject: [PATCH 05/23] Removed unnecessary comments --- app/web/routers/pipelines.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 2167dd94..9fd2b465 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -223,14 +223,12 @@ def run_competency_extraction_pipeline_worker( def run_rewriting_pipeline_worker( dto: RewritingPipelineExecutionDTO, _variant: str ): - try: - # Replace with actual Callback class + try: callback = RewritingCallback( run_id=dto.execution.settings.authentication_token, base_url=dto.execution.settings.artemis_base_url, initial_stages=dto.execution.initial_stages, ) - #Replace with actual pipeline RefrasingPipeline pipeline = RewritingPipeline(callback=callback) except Exception as e: logger.error(f"Error preparing rewriting pipeline: {e}") From 79e2dc287d950f7279e7d10508da558576574fd3 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Sat, 18 Jan 2025 21:33:39 +0100 Subject: [PATCH 06/23] Reformat using --- .../rewriting_pipeline_execution_dto.py | 2 +- app/pipeline/chat/course_chat_pipeline.py | 6 ++++-- .../chat/exercise_chat_agent_pipeline.py | 4 +++- app/pipeline/prompts/faq_rewriting.py | 2 +- app/pipeline/rewriting_pipeline.py | 10 +++------- app/pipeline/shared/citation_pipeline.py | 3 ++- app/web/routers/pipelines.py | 19 +++++++------------ app/web/status/status_update.py | 1 + 8 files changed, 22 insertions(+), 25 deletions(-) diff --git a/app/domain/rewriting_pipeline_execution_dto.py b/app/domain/rewriting_pipeline_execution_dto.py index 9af08b5a..b8560899 100644 --- a/app/domain/rewriting_pipeline_execution_dto.py +++ b/app/domain/rewriting_pipeline_execution_dto.py @@ -8,4 +8,4 @@ class RewritingPipelineExecutionDTO(BaseModel): execution: PipelineExecutionDTO - to_be_rewritten : str = Field(alias="toBeRewritten") + to_be_rewritten: str = Field(alias="toBeRewritten") diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py index 9e3306f9..f0904116 100644 --- a/app/pipeline/chat/course_chat_pipeline.py +++ b/app/pipeline/chat/course_chat_pipeline.py @@ -100,14 +100,16 @@ def __init__( requirements=RequirementList( gpt_version_equivalent=4.5, ) - ), completion_args=completion_args + ), + completion_args=completion_args, ) self.llm_small = IrisLangchainChatModel( request_handler=CapabilityRequestHandler( requirements=RequirementList( gpt_version_equivalent=4.25, ) - ), completion_args=completion_args + ), + completion_args=completion_args, ) self.callback = callback diff --git a/app/pipeline/chat/exercise_chat_agent_pipeline.py b/app/pipeline/chat/exercise_chat_agent_pipeline.py index ff9e86da..676f96c6 100644 --- a/app/pipeline/chat/exercise_chat_agent_pipeline.py +++ b/app/pipeline/chat/exercise_chat_agent_pipeline.py @@ -533,7 +533,9 @@ def lecture_content_retrieval() -> str: ] ) - guide_response = (self.prompt | self.llm_small | StrOutputParser()).invoke( + guide_response = ( + self.prompt | self.llm_small | StrOutputParser() + ).invoke( { "response": out, } diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index af74085c..553036b7 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -12,4 +12,4 @@ {rewritten_text} Respond with a single string containing only the improved version of the text. Your output will be used as a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. -""" \ No newline at end of file +""" diff --git a/app/pipeline/rewriting_pipeline.py b/app/pipeline/rewriting_pipeline.py index 95c1b503..10af525a 100644 --- a/app/pipeline/rewriting_pipeline.py +++ b/app/pipeline/rewriting_pipeline.py @@ -14,7 +14,7 @@ from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments from app.pipeline import Pipeline from app.pipeline.prompts.faq_rewriting import system_prompt_faq -from app.web.status.status_update import RewritingCallback +from app.web.status.status_update import RewritingCallback logger = logging.getLogger(__name__) @@ -25,9 +25,7 @@ class RewritingPipeline(Pipeline): output_parser: PydanticOutputParser def __init__(self, callback: Optional[RewritingCallback] = None): - super().__init__( - implementation_id="rewriting_pipeline_reference_impl" - ) + super().__init__(implementation_id="rewriting_pipeline_reference_impl") self.callback = callback self.request_handler = CapabilityRequestHandler( requirements=RequirementList( @@ -59,9 +57,7 @@ def __call__( response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) - self._append_tokens( - response.token_usage, PipelineEnum.IRIS_REWRITING_PIPELINE - ) + self._append_tokens(response.token_usage, PipelineEnum.IRIS_REWRITING_PIPELINE) response = response.contents[0].text_content final_result = response logging.info(f"Final rewritten text: {final_result}") diff --git a/app/pipeline/shared/citation_pipeline.py b/app/pipeline/shared/citation_pipeline.py index 22e13360..fc71016b 100644 --- a/app/pipeline/shared/citation_pipeline.py +++ b/app/pipeline/shared/citation_pipeline.py @@ -57,7 +57,8 @@ def create_formatted_string(self, paragraphs): paragraph.get(LectureSchema.LECTURE_NAME.value), paragraph.get(LectureSchema.LECTURE_UNIT_NAME.value), paragraph.get(LectureSchema.PAGE_NUMBER.value), - paragraph.get(LectureSchema.LECTURE_UNIT_LINK.value) or "No link available", + paragraph.get(LectureSchema.LECTURE_UNIT_LINK.value) + or "No link available", paragraph.get(LectureSchema.PAGE_TEXT_CONTENT.value), ) formatted_string += lct diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 9fd2b465..8c9dd6c0 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -22,7 +22,8 @@ ExerciseChatStatusCallback, CourseChatStatusCallback, CompetencyExtractionCallback, - LectureChatCallback, RewritingCallback, + LectureChatCallback, + RewritingCallback, ) from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline from app.dependencies import TokenValidator @@ -220,10 +221,8 @@ def run_competency_extraction_pipeline_worker( callback.error("Fatal error.", exception=e) -def run_rewriting_pipeline_worker( - dto: RewritingPipelineExecutionDTO, _variant: str -): - try: +def run_rewriting_pipeline_worker(dto: RewritingPipelineExecutionDTO, _variant: str): + try: callback = RewritingCallback( run_id=dto.execution.settings.authentication_token, base_url=dto.execution.settings.artemis_base_url, @@ -243,6 +242,7 @@ def run_rewriting_pipeline_worker( logger.error(traceback.format_exc()) callback.error("Fatal error.", exception=e) + @router.post( "/competency-extraction/{variant}/run", status_code=status.HTTP_202_ACCEPTED, @@ -257,19 +257,14 @@ def run_competency_extraction_pipeline( thread.start() - @router.post( "/rewriting/{variant}/run", status_code=status.HTTP_202_ACCEPTED, dependencies=[Depends(TokenValidator())], ) -def run_rewriting_pipeline( - variant: str, dto: RewritingPipelineExecutionDTO -): +def run_rewriting_pipeline(variant: str, dto: RewritingPipelineExecutionDTO): logger.info(f"Rewriting pipeline started with variant: {variant} and dto: {dto}") - thread = Thread( - target=run_rewriting_pipeline_worker, args=(dto, variant) - ) + thread = Thread(target=run_rewriting_pipeline_worker, args=(dto, variant)) thread.start() diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 28b8338f..56b744e4 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -275,6 +275,7 @@ def __init__( stage = stages[-1] super().__init__(url, run_id, status, stage, len(stages) - 1) + class RewritingCallback(StatusCallback): def __init__( self, From 07153670ab8bbed14b05d2d067c782f7b364cdfd Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Sat, 18 Jan 2025 22:30:02 +0100 Subject: [PATCH 07/23] Reformat using --- app/domain/rewriting_pipeline_execution_dto.py | 4 ---- app/pipeline/prompts/faq_rewriting.py | 5 +++-- app/pipeline/rewriting_pipeline.py | 1 - 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/app/domain/rewriting_pipeline_execution_dto.py b/app/domain/rewriting_pipeline_execution_dto.py index b8560899..2a25690e 100644 --- a/app/domain/rewriting_pipeline_execution_dto.py +++ b/app/domain/rewriting_pipeline_execution_dto.py @@ -1,9 +1,5 @@ -from typing import List - from pydantic import Field, BaseModel - from . import PipelineExecutionDTO -from .data.competency_dto import CompetencyTaxonomy, Competency class RewritingPipelineExecutionDTO(BaseModel): diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 553036b7..87d09161 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -1,9 +1,10 @@ system_prompt_faq = """ -You are a excellent tutor with expertise in computer science and practical applications teaching an university course. Your task is to proofread and refine the given text of an FAQ. Specifically, you should: +You are an excellent tutor with expertise in computer science and practical applications teaching an university course. +Your task is to proofread and refine the given text of an FAQ. Specifically, you should: 1. Correct all spelling and grammatical errors. 2. Ensure the text is written in simple and clear language, making it easy to understand for students. -3. Preserve the original meaning and intent of the text. +3. Preserve the original meaning and intent of the text while maintaining clarity. 4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, convert them into complete sentences. 5. Make sure to use the original language of the input text 6. Avoid repeating any information that is already present in the text. diff --git a/app/pipeline/rewriting_pipeline.py b/app/pipeline/rewriting_pipeline.py index 10af525a..87f05166 100644 --- a/app/pipeline/rewriting_pipeline.py +++ b/app/pipeline/rewriting_pipeline.py @@ -33,7 +33,6 @@ def __init__(self, callback: Optional[RewritingCallback] = None): context_length=16385, ) ) - self.output_parser = PydanticOutputParser(pydantic_object=Competency) self.tokens = [] def __call__( From 271187e7b739347b66fd0185bb489619d1d0171d Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 11:27:59 +0100 Subject: [PATCH 08/23] Added further prompt to avoid small text issue --- app/pipeline/prompts/faq_rewriting.py | 1 + app/pipeline/rewriting_pipeline.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 87d09161..a8e6160b 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -9,6 +9,7 @@ 5. Make sure to use the original language of the input text 6. Avoid repeating any information that is already present in the text. 7. Make sure to keep the markdown formatting intact and add formatting for the most important information +8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make sure to respond accordingly {rewritten_text} diff --git a/app/pipeline/rewriting_pipeline.py b/app/pipeline/rewriting_pipeline.py index 87f05166..2b227ab7 100644 --- a/app/pipeline/rewriting_pipeline.py +++ b/app/pipeline/rewriting_pipeline.py @@ -59,5 +59,4 @@ def __call__( self._append_tokens(response.token_usage, PipelineEnum.IRIS_REWRITING_PIPELINE) response = response.contents[0].text_content final_result = response - logging.info(f"Final rewritten text: {final_result}") self.callback.done(final_result=final_result, tokens=self.tokens) From 8131455c039f0e7824d2272baef1e298e581fd3f Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:05:07 +0100 Subject: [PATCH 09/23] Try to fix mini issue --- app/pipeline/prompts/faq_rewriting.py | 12 +++++++++--- app/pipeline/rewriting_pipeline.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index a8e6160b..9e72209b 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -5,13 +5,19 @@ 1. Correct all spelling and grammatical errors. 2. Ensure the text is written in simple and clear language, making it easy to understand for students. 3. Preserve the original meaning and intent of the text while maintaining clarity. -4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, convert them into complete sentences. +4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, +convert them into complete sentences. 5. Make sure to use the original language of the input text 6. Avoid repeating any information that is already present in the text. 7. Make sure to keep the markdown formatting intact and add formatting for the most important information -8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make sure to respond accordingly +8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make +sure to respond accordingly + +Respond with a single string containing only the improved version of the text. Your output will be used as an answer to +a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. + +The text to be rephrased starts now and last until the of the prompt: {rewritten_text} -Respond with a single string containing only the improved version of the text. Your output will be used as a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. """ diff --git a/app/pipeline/rewriting_pipeline.py b/app/pipeline/rewriting_pipeline.py index 2b227ab7..07b0d575 100644 --- a/app/pipeline/rewriting_pipeline.py +++ b/app/pipeline/rewriting_pipeline.py @@ -9,7 +9,6 @@ from app.common.PipelineEnum import PipelineEnum from app.common.pyris_message import PyrisMessage, IrisMessageRole from app.domain.data.text_message_content_dto import TextMessageContentDTO -from app.domain.data.competency_dto import Competency from app.domain.rewriting_pipeline_execution_dto import RewritingPipelineExecutionDTO from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments from app.pipeline import Pipeline From 89decd1c92669151ceb9a8e60730a2d9ccdc9e8c Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:06:31 +0100 Subject: [PATCH 10/23] Try to fix mini issue --- app/pipeline/prompts/faq_rewriting.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 9e72209b..16dc0f0f 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -13,11 +13,13 @@ 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make sure to respond accordingly -Respond with a single string containing only the improved version of the text. Your output will be used as an answer to -a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. - -The text to be rephrased starts now and last until the of the prompt: +The text to be rephrased starts now: {rewritten_text} +The rephrased text ends prior to this. + +Respond with a single string containing only the improved version of the text. Your output will be used as an answer to +a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. + """ From 2714fa0b0849e3c37cfd313ec5358010503422ca Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:06:46 +0100 Subject: [PATCH 11/23] Try to fix mini issue --- app/pipeline/prompts/faq_rewriting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 16dc0f0f..a89a409a 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -17,7 +17,7 @@ {rewritten_text} -The rephrased text ends prior to this. +The rephrased text ends prior to line. Respond with a single string containing only the improved version of the text. Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. From b848eb0bfba4f72639062007573c6d6c721e355d Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:13:29 +0100 Subject: [PATCH 12/23] fixed prompt once more --- app/pipeline/prompts/faq_rewriting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index a89a409a..9001b887 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -17,7 +17,7 @@ {rewritten_text} -The rephrased text ends prior to line. +The rephrased text ends prior to this line. Respond with a single string containing only the improved version of the text. Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. From 0d9fb04e5f6e402dbe515687fd04a2278599d749 Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:22:51 +0100 Subject: [PATCH 13/23] fixed prompt once more --- app/pipeline/prompts/faq_rewriting.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 9001b887..e861a93c 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -11,13 +11,12 @@ 6. Avoid repeating any information that is already present in the text. 7. Make sure to keep the markdown formatting intact and add formatting for the most important information 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make -sure to respond accordingly +sure to respond accordingly. Also, if the input The text to be rephrased starts now: - +###START### {rewritten_text} - -The rephrased text ends prior to this line. +###END### Respond with a single string containing only the improved version of the text. Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. From 7d9bffa9e82cd75a50cf2e977f94b154eb511c92 Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:32:26 +0100 Subject: [PATCH 14/23] fixed prompt once more --- app/pipeline/prompts/faq_rewriting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index e861a93c..704dc35d 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -13,7 +13,7 @@ 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make sure to respond accordingly. Also, if the input -The text to be rephrased starts now: +The text to be rephrased starts after the start tag and ends with an end tag: ###START### {rewritten_text} ###END### From 8685800364d3651936eccf8f8a28e5cad8025b93 Mon Sep 17 00:00:00 2001 From: TIm Cremer Date: Tue, 21 Jan 2025 12:32:59 +0100 Subject: [PATCH 15/23] fixed prompt once more --- app/pipeline/prompts/faq_rewriting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 704dc35d..55e3959b 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -13,7 +13,7 @@ 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make sure to respond accordingly. Also, if the input -The text to be rephrased starts after the start tag and ends with an end tag: +The text to be rephrased starts after the start tag (###START###) and ends before the an end tag (###END###): ###START### {rewritten_text} ###END### From f35f2b51bdaabdec719f46142b1658d9f94ee1c6 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Tue, 21 Jan 2025 17:49:29 +0100 Subject: [PATCH 16/23] Test --- app/common/PipelineEnum.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/common/PipelineEnum.py b/app/common/PipelineEnum.py index b6f84a80..fc439a65 100644 --- a/app/common/PipelineEnum.py +++ b/app/common/PipelineEnum.py @@ -14,5 +14,4 @@ class PipelineEnum(str, Enum): IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" - IRIS_REWRITING_PIPELINE = "IRIS_REWRITING_PIPELINE" NOT_SET = "NOT_SET" From 76e5c9b3afd169c7f6b0a1e20c02b91fb5a0ebf1 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Tue, 21 Jan 2025 17:50:13 +0100 Subject: [PATCH 17/23] Test --- app/common/PipelineEnum.py | 1 + app/pipeline/prompts/faq_rewriting.py | 42 +++++++++++++++------------ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/app/common/PipelineEnum.py b/app/common/PipelineEnum.py index fc439a65..b6f84a80 100644 --- a/app/common/PipelineEnum.py +++ b/app/common/PipelineEnum.py @@ -14,4 +14,5 @@ class PipelineEnum(str, Enum): IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" + IRIS_REWRITING_PIPELINE = "IRIS_REWRITING_PIPELINE" NOT_SET = "NOT_SET" diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 55e3959b..cac71de7 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -1,24 +1,30 @@ -system_prompt_faq = """ -You are an excellent tutor with expertise in computer science and practical applications teaching an university course. -Your task is to proofread and refine the given text of an FAQ. Specifically, you should: +system_prompt_faq = """: +You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university +level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines: -1. Correct all spelling and grammatical errors. -2. Ensure the text is written in simple and clear language, making it easy to understand for students. -3. Preserve the original meaning and intent of the text while maintaining clarity. -4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, -convert them into complete sentences. -5. Make sure to use the original language of the input text -6. Avoid repeating any information that is already present in the text. -7. Make sure to keep the markdown formatting intact and add formatting for the most important information -8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make -sure to respond accordingly. Also, if the input +1. Accuracy: Correct all spelling, grammatical, and punctuation errors. +2. Clarity: Rewrite the text in simple and clear language so that it is easy for students to understand. +3. Content Fidelity: Preserve the original meaning and intent of the text. +4. Complete Sentences: Always write in complete sentences. If the input is presented as a list, convert it into coherent paragraphs. +5. Original Language: Use the same language as the input text. +6. Avoid Repetition: Do not repeat information already provided in the text. +7. Markdown Formatting: Retain any Markdown formatting and emphasize key information appropriately. + +Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, +respond appropriately and point this out. +Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform. +Ensure it is clear, concise, and well-structured. + +Exclude the start and end markers from your response and provide only the improved content. + +The markers are defined as following: +Start of the text: ###START### +End of the text: ###END### + +The text that has to be rewritten starts now: -The text to be rephrased starts after the start tag (###START###) and ends before the an end tag (###END###): ###START### {rewritten_text} ###END### -Respond with a single string containing only the improved version of the text. Your output will be used as an answer to -a frequently asked question (FAQ) on the Artemis platform, so make sure it is clear and concise. - -""" +""" \ No newline at end of file From 402c65c04d8cf3d1d873266275315b29c39f8009 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Wed, 22 Jan 2025 07:02:33 +0100 Subject: [PATCH 18/23] Test --- app/pipeline/prompts/faq_rewriting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index cac71de7..8c4ba16c 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -6,7 +6,7 @@ 2. Clarity: Rewrite the text in simple and clear language so that it is easy for students to understand. 3. Content Fidelity: Preserve the original meaning and intent of the text. 4. Complete Sentences: Always write in complete sentences. If the input is presented as a list, convert it into coherent paragraphs. -5. Original Language: Use the same language as the input text. +5. Original Language: Use the same language as the input text. The input text will be either german or english. 6. Avoid Repetition: Do not repeat information already provided in the text. 7. Markdown Formatting: Retain any Markdown formatting and emphasize key information appropriately. From 8776ea2929978e385c898735d00cba485263c181 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Wed, 22 Jan 2025 07:14:40 +0100 Subject: [PATCH 19/23] Pushed another prompt improvement --- app/pipeline/prompts/faq_rewriting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index 8c4ba16c..b7e117ab 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -5,7 +5,8 @@ 1. Accuracy: Correct all spelling, grammatical, and punctuation errors. 2. Clarity: Rewrite the text in simple and clear language so that it is easy for students to understand. 3. Content Fidelity: Preserve the original meaning and intent of the text. -4. Complete Sentences: Always write in complete sentences. If the input is presented as a list, convert it into coherent paragraphs. +4. Complete Sentences: Always write in complete sentences. If the input is presented as a list, convert it into +coherent paragraphs, but try to keep the structure of the input. 5. Original Language: Use the same language as the input text. The input text will be either german or english. 6. Avoid Repetition: Do not repeat information already provided in the text. 7. Markdown Formatting: Retain any Markdown formatting and emphasize key information appropriately. @@ -27,4 +28,4 @@ {rewritten_text} ###END### -""" \ No newline at end of file +""" From 9ddf634f4985b0a42c282d239e1e4fbf1c13e3a9 Mon Sep 17 00:00:00 2001 From: Tim Cremer Date: Wed, 22 Jan 2025 07:17:36 +0100 Subject: [PATCH 20/23] Redo task part of the prompt --- app/pipeline/prompts/faq_rewriting.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index b7e117ab..dc4d4700 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -2,14 +2,16 @@ You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines: -1. Accuracy: Correct all spelling, grammatical, and punctuation errors. -2. Clarity: Rewrite the text in simple and clear language so that it is easy for students to understand. -3. Content Fidelity: Preserve the original meaning and intent of the text. -4. Complete Sentences: Always write in complete sentences. If the input is presented as a list, convert it into -coherent paragraphs, but try to keep the structure of the input. -5. Original Language: Use the same language as the input text. The input text will be either german or english. -6. Avoid Repetition: Do not repeat information already provided in the text. -7. Markdown Formatting: Retain any Markdown formatting and emphasize key information appropriately. +1. Correct all spelling and grammatical errors. +2. Ensure the text is written in simple and clear language, making it easy to understand for students. +3. Preserve the original meaning and intent of the text while maintaining clarity. +4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, +convert them into complete sentences. +5. Make sure to use the original language of the input text +6. Avoid repeating any information that is already present in the text. +7. Make sure to keep the markdown formatting intact and add formatting for the most important information +8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make +sure to respond accordingly. Also, if the input text is too short, please point this out. Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, respond appropriately and point this out. From 8760e60d1dd3e1fa11f86ab27dd174eb4fc3ff8d Mon Sep 17 00:00:00 2001 From: "Felix T.J. Dietrich" Date: Mon, 27 Jan 2025 09:52:04 +0100 Subject: [PATCH 21/23] fix lint issues and prompt formatting --- .../status/rewriting_status_update_dto.py | 1 - app/pipeline/prompts/faq_rewriting.py | 19 +++++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/app/domain/status/rewriting_status_update_dto.py b/app/domain/status/rewriting_status_update_dto.py index cdab8bb1..f4351342 100644 --- a/app/domain/status/rewriting_status_update_dto.py +++ b/app/domain/status/rewriting_status_update_dto.py @@ -1,4 +1,3 @@ -from app.domain.data.competency_dto import Competency from app.domain.status.status_update_dto import StatusUpdateDTO diff --git a/app/pipeline/prompts/faq_rewriting.py b/app/pipeline/prompts/faq_rewriting.py index dc4d4700..da88b427 100644 --- a/app/pipeline/prompts/faq_rewriting.py +++ b/app/pipeline/prompts/faq_rewriting.py @@ -1,22 +1,22 @@ -system_prompt_faq = """: -You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university +system_prompt_faq = """\ +:You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines: 1. Correct all spelling and grammatical errors. 2. Ensure the text is written in simple and clear language, making it easy to understand for students. 3. Preserve the original meaning and intent of the text while maintaining clarity. -4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, +4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, \ convert them into complete sentences. -5. Make sure to use the original language of the input text +5. Make sure to use the original language of the input text. 6. Avoid repeating any information that is already present in the text. -7. Make sure to keep the markdown formatting intact and add formatting for the most important information -8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make +7. Make sure to keep the markdown formatting intact and add formatting for the most important information. +8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make. sure to respond accordingly. Also, if the input text is too short, please point this out. -Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, +Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, \ respond appropriately and point this out. Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform. -Ensure it is clear, concise, and well-structured. +Ensure it is clear, concise, and well-structured. Exclude the start and end markers from your response and provide only the improved content. @@ -28,6 +28,5 @@ ###START### {rewritten_text} -###END### - +###END###\ """ From a62d15b035e3e05d11655fe919f897e6d8ff83b4 Mon Sep 17 00:00:00 2001 From: Patrick Bassner Date: Tue, 28 Jan 2025 21:08:35 +0100 Subject: [PATCH 22/23] black --- app/web/routers/pipelines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index bffe1185..ffd34c85 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -356,7 +356,7 @@ def get_pipeline(feature: str): FeatureDTO( id="default", name="Default Variant", - description="Default lecture chat variant and rewriting variant." + description="Default lecture chat variant and rewriting variant.", ) ] case "REWRITING": From e81d157398ba1c794e22e60c89891303c8519eca Mon Sep 17 00:00:00 2001 From: Patrick Bassner Date: Tue, 28 Jan 2025 21:12:32 +0100 Subject: [PATCH 23/23] wrong description --- app/web/routers/pipelines.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index ffd34c85..3431c196 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -356,7 +356,7 @@ def get_pipeline(feature: str): FeatureDTO( id="default", name="Default Variant", - description="Default lecture chat variant and rewriting variant.", + description="Default lecture chat variant.", ) ] case "REWRITING":