Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement openai automatic staff grading #1

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,24 @@
{% trans "Specify whether learners can see the rubric while they are working on their response." %}
</p>
</li>
<li id="openassessment_ai_completion_wrapper" class="field comp-setting-entry">
<div class="wrapper-comp-setting">
<label for="openassessment_ai_completion" class="setting-label">{% trans "Prompt for AI completion"%}</label>
<textarea style="min-height:200px;width:45%;" id="openassessment_ai_completion" class="input setting-input">{{ ai_completion }}</textarea>
</div>
<p class="setting-help">{% trans "Available template variables are: question, student_answer, prompts" %}</p>
</li>
<li id="openassessment_ai_model_wrapper" class="field comp-setting-entry">
<div id="openassessment_ai_model_selection_wrapper" class="wrapper-comp-setting">
<div class="wrapper-comp-setting">
<label for="openassessment_ai_model_selector" class="setting-label">{% trans "Select AI model"%}</label>
<select id="openassessment_ai_model_selector" class="input setting-input" name="ai_model selection">
{% for ai_model, ai_model_name in ai_models.items %}
<option value='{{ ai_model }}' {% if selected_ai_model == ai_model %} selected="true" {% endif %}>{{ ai_model_name }}</option>
{% endfor %}

</select>
</div>
</div>
</li>
</ul>
41 changes: 41 additions & 0 deletions openassessment/xblock/ai_processors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from django.conf import settings
import openai

def get_openai_key():
return settings.OPENAI_API_KEY if hasattr(settings, 'OPENAI_API_KEY') else None

def davinci_model_processor(question: str, messages: list = []) -> str:
"""
The davinci AI processor
"""
openai.api_key = get_openai_key()
if openai.api_key:
return openai.Completion.create(
engine="text-davinci-003",
# engine="gpt-3.5-turbo",
prompt=question,
max_tokens=500, # TODO: move to settings and get from there. Use automatic tokens counter - https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
stop=[" "],
temperature=1,
).choices[0].text
return "OpenAI API key not set."


def gpt_model_processor(question: str, messages: list = []) -> str:
"""
The Chat-GPT AI processor
"""
openai.api_key = get_openai_key()
if openai.api_key:
if not messages:
messages = [{"role": "user", "content": question}]

completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
max_tokens=1000,
stop=[" "],
temperature=0.7,
)
return completion.choices[0].message["content"]
return "OpenAI API key not set."
188 changes: 188 additions & 0 deletions openassessment/xblock/automatic_assessment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""
TODO: Add module description.
"""
import re
import logging


from django.conf import settings

from openassessment.assessment.api import staff as staff_api
from openassessment.assessment.errors import StaffAssessmentInternalError, StaffAssessmentRequestError
from openassessment.workflow import api as workflow_api
from openassessment.xblock import ai_processors

from .data_conversion import clean_criterion_feedback

logger = logging.getLogger(__name__)

AI_MODELS_LIST = {
'gpt_model_processor': "Chat GPT",
'davinci_model_processor': "Text-Davinci"
}


# TODO: Move methods like below to utils.
def format_prompts(rubric): # TODO: Add docstring.
prompts_text = ""

options = rubric['options']

for index, elem in enumerate(options):
prompts_text += f"{index}: {elem['explanation']}\n"

return prompts_text


def get_student_response_from_ai(question, answer, prompts, ai_prompt, ai_model):
# Tag
# openai.api_key = getattr(settings, 'OPENAI_API_KEY', "") # TODO: Move to settings and get from there

if ai_model in AI_MODELS_LIST.keys():
if ai_processor := getattr(ai_processors, ai_model) if hasattr(ai_processors, ai_model) else None:


messages = []


prompt = ai_prompt.format(question=question, student_answer=answer, prompts=prompts)

if ai_model == 'gpt_model_processor':
messages = [
{"role": "system", "content":f"Instructor enters the following prompt for the ChatGPT: {ai_prompt}"},
{"role": "system", "content":f"The question to the student is: {question}."},
{"role": "system", "content":f"The rubric for grading student's response: {prompts}."},
{"role": "system", "content":f"Please select the most suited rubric number, and include phrase RUBRIC_OPTION_IS: rubric number, also please follow the prompt {ai_prompt}"},
{"role": "user", "content":f"My answer is: {answer}."},
]

logger.info(f"{AI_MODELS_LIST.get(ai_model)} prompt")

logger.info(prompt)
return ai_processor(prompt, messages)


def process_ai_response(text):
# import pdb; pdb.set_trace()
regexp_expr = r"RUBRIC_OPTION_IS: (?:\d)\.?" # TODO: Move to settings or config model.
resp = re.findall(regexp_expr, text)

# TODO: refactor for better catching errors and optimizations.
try:
response_option = int(''.join(filter(str.isdigit, resp[0])))
response_explanation = text.split(resp[0])[-1].strip()
except Exception:
return None, None

return response_option, response_explanation


# TODO: Move to config or Django config model.
# You are the teacher.
QUESTION_TEMPLATE = """
You are a cisco instructor, critque the answer based on wendell odems cisco press ccna book.
Question prompt is:
{question}\n
Student's answer is: {student_answer}.
Provide response in the format "RIGHT ANSWER IS": <most suitable choice here>.
Choose the most suitable choice from the following:
{prompts}
Write short (200 symbols) feedback about student answer points (What was correct and what was wrong).
Max response length should be up to 200 symbols.
"""

QUESTION_TEMPLATE1 = """
You are a cisco instructor, critique the answer based on wendell odems cisco press ccna book.
Question prompt is:
"{question}"\n
Responder's answer is: "{student_answer}".

Write answer in the format "GRADE FOR STUDENT ANSWER IS": <choice as a number from the rubric>
Rubric:
{prompts}\n
In addition write short feedback (200 words) about student answer points, what was correct and what was wrong.
"""

QUESTION_TEMPLATE2 = """
You are a NASA instructor, please grade reflection based on the rubric.
Question prompt is: "{question}"\n
Responder's answer is: "{student_answer}".

Write short feedback (200 words) about responders answer in first person, do not critique the answer, but respond empathetically.
"""


def generate_automatic_assessment(question, criterias, student_answer_data, student_id, rubric_dict, ai_completion,
ai_model):
# import pdb; pdb.set_trace()
result_dict_data = {
# 'options_selected': {'Ideas': 'Poor'},
'options_selected': {},
# 'criterion_feedback': {'Ideas': 'Це просто ужас. Таких студенів треба гнати з платформи.'},
'criterion_feedback': {},
'overall_feedback': '',
'submission_uuid': student_answer_data['uuid'],
'assess_type': 'full-grade'
}

# TODO: Process getting data for avoiding errors.

student_answer_text = student_answer_data['answer']['parts'][0]['text']


for criteria in criterias:
prompts = format_prompts(criteria)
response = get_student_response_from_ai(question, student_answer_text, prompts, ai_completion, ai_model)

logger.info("Response from AI is: {}".format(response))
if response:

choice, explanation = process_ai_response(response)

if not choice:
choice = 0
explanation = response

if choice or explanation:
# TODO: Refactor ??
result_dict_data['options_selected'][criteria['name']] = criteria['options'][choice]['name']
result_dict_data['criterion_feedback'][criteria['name']] = explanation

data = result_dict_data

if not result_dict_data['options_selected']:
# Return here for avoiding settings the empty staff response
return

# Save as staff response.

# TODO: Move code below to another method (logic separation is awesome feature)
try:
assessment = staff_api.create_assessment(
data['submission_uuid'],
student_id,
data['options_selected'],
clean_criterion_feedback(criterias, data['criterion_feedback']),
data['overall_feedback'],
rubric_dict,
)
assess_type = data.get('assess_type', 'regrade')
workflow_api.update_from_assessments(
assessment["submission_uuid"],
None,
override_submitter_requirements=(assess_type == 'regrade')
)
except StaffAssessmentRequestError:
logger.warning(
"An error occurred while submitting a staff assessment "
"for the submission %s",
data['submission_uuid'],
exc_info=True
)
return False
except StaffAssessmentInternalError:
logger.exception(
"An error occurred while submitting a staff assessment "
"for the submission %s",
data['submission_uuid']
)
15 changes: 15 additions & 0 deletions openassessment/xblock/openassessmentblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from openassessment.xblock.xml import parse_from_xml, serialize_content_to_xml
from openassessment.xblock.editor_config import AVAILABLE_EDITORS
from openassessment.xblock.load_static import LoadStatic
from openassessment.xblock.automatic_assessment import QUESTION_TEMPLATE1

logger = logging.getLogger(__name__) # pylint: disable=invalid-name

Expand Down Expand Up @@ -306,6 +307,18 @@ class OpenAssessmentBlock(MessageMixin,
help="Should the rubric be visible to learners in the response section?"
)

ai_completion = String(
scope=Scope.settings,
default=QUESTION_TEMPLATE1,
help="AI question template.",
)

ai_model = String(
scope=Scope.settings,
default="",
help="AI model.",
)

@property
def course_id(self):
return str(self.xmodule_runtime.course_id) # pylint: disable=no-member
Expand Down Expand Up @@ -901,6 +914,7 @@ def parse_xml(cls, node, runtime, keys, id_generator):
block.teams_enabled = config['teams_enabled']
block.selected_teamset_id = config['selected_teamset_id']
block.show_rubric_during_response = config['show_rubric_during_response']
block.ai_model = config['ai_model']
return block

@property
Expand Down Expand Up @@ -1031,6 +1045,7 @@ def render_assessment(self, path, context_dict=None):
context_dict = {}

context_dict['text_response_editor'] = self.text_response_editor
context_dict['ai_model'] = self.ai_model

template = get_template(path)
return Response(template.render(context_dict), content_type='application/html', charset='UTF-8')
Expand Down
2 changes: 2 additions & 0 deletions openassessment/xblock/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,6 @@ def datetime_validator(value):
})
],
Required('show_rubric_during_response', default=False): bool,
Required('ai_completion'): utf8_validator,
Required('ai_model'): utf8_validator,
})
8 changes: 4 additions & 4 deletions openassessment/xblock/static/dist/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"openassessment-editor-textarea.js.map": "/openassessment-editor-textarea.0babe8f5dfb2d462894a.js.map",
"openassessment-editor-tinymce.js": "/openassessment-editor-tinymce.990d808ee1930fbf3200.js",
"openassessment-editor-tinymce.js.map": "/openassessment-editor-tinymce.990d808ee1930fbf3200.js.map",
"openassessment-lms.js": "/openassessment-lms.f22003cefb5051270caa.js",
"openassessment-lms.js.map": "/openassessment-lms.f22003cefb5051270caa.js.map",
"openassessment-lms.js": "/openassessment-lms.86fbdc00dc4d8f98fe89.js",
"openassessment-lms.js.map": "/openassessment-lms.86fbdc00dc4d8f98fe89.js.map",
"openassessment-ltr.css": "/openassessment-ltr.d387a062a81f7931a1c9.css",
"openassessment-ltr.js": "/openassessment-ltr.d387a062a81f7931a1c9.js",
"openassessment-ltr.css.map": "/openassessment-ltr.d387a062a81f7931a1c9.css.map",
Expand All @@ -14,7 +14,7 @@
"openassessment-rtl.js": "/openassessment-rtl.fdcf1f14feb03ce4e582.js",
"openassessment-rtl.css.map": "/openassessment-rtl.fdcf1f14feb03ce4e582.css.map",
"openassessment-rtl.js.map": "/openassessment-rtl.fdcf1f14feb03ce4e582.js.map",
"openassessment-studio.js": "/openassessment-studio.fd7c3468f116d1200432.js",
"openassessment-studio.js.map": "/openassessment-studio.fd7c3468f116d1200432.js.map",
"openassessment-studio.js": "/openassessment-studio.9038bc371da4ada4afa1.js",
"openassessment-studio.js.map": "/openassessment-studio.9038bc371da4ada4afa1.js.map",
"default-avatar.svg": "/95ec738c0b7faac5b5c9126794446bbd.svg"
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
/******/
/******/ var hotApplyOnUpdate = true;
/******/ // eslint-disable-next-line no-unused-vars
/******/ var hotCurrentHash = "7a38f0bcd21a11990cc9";
/******/ var hotCurrentHash = "1d0a2638b44fd39c78aa";
/******/ var hotRequestTimeout = 10000;
/******/ var hotCurrentModuleData = {};
/******/ var hotCurrentChildModule;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
/******/
/******/ var hotApplyOnUpdate = true;
/******/ // eslint-disable-next-line no-unused-vars
/******/ var hotCurrentHash = "7a38f0bcd21a11990cc9";
/******/ var hotCurrentHash = "1d0a2638b44fd39c78aa";
/******/ var hotRequestTimeout = 10000;
/******/ var hotCurrentModuleData = {};
/******/ var hotCurrentChildModule;
Expand Down

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions openassessment/xblock/static/dist/openassessment-lms.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion openassessment/xblock/static/dist/openassessment-ltr.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
/******/
/******/ var hotApplyOnUpdate = true;
/******/ // eslint-disable-next-line no-unused-vars
/******/ var hotCurrentHash = "7a38f0bcd21a11990cc9";
/******/ var hotCurrentHash = "1d0a2638b44fd39c78aa";
/******/ var hotRequestTimeout = 10000;
/******/ var hotCurrentModuleData = {};
/******/ var hotCurrentChildModule;
Expand Down
2 changes: 1 addition & 1 deletion openassessment/xblock/static/dist/openassessment-rtl.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
/******/
/******/ var hotApplyOnUpdate = true;
/******/ // eslint-disable-next-line no-unused-vars
/******/ var hotCurrentHash = "7a38f0bcd21a11990cc9";
/******/ var hotCurrentHash = "1d0a2638b44fd39c78aa";
/******/ var hotRequestTimeout = 10000;
/******/ var hotCurrentModuleData = {};
/******/ var hotCurrentChildModule;
Expand Down
Loading