from math_verify import parse, verify
import pandas as pd
from pydantic import BaseModel
from openreward.environments import Environment, JSONObject, Server, TextBlock, ToolOutput, tool
class AIME2024TaskSpec(BaseModel):
id: str
problem: str
answer: str
class AnswerParams(BaseModel):
answer: str
test_tasks = pd.read_parquet("aime_2024_problems.parquet").to_dict(orient="records")
for i, task in enumerate(test_tasks):
task.pop('ID')
task.pop('Solution')
task['id'] = str(i)
task['Answer'] = str(task['Answer'])
keys_to_change = [key for key in task.keys() if key != "id"]
for key in keys_to_change:
if key.lower() != key:
task[key.lower()] = task.pop(key)
class AIME2024(Environment):
"""
An environment for the AIME 2024 dataset
"""
def __init__(self, task_spec: JSONObject = {}):
super().__init__(task_spec)
self.config = AIME2024TaskSpec.model_validate(task_spec)
@classmethod
def list_tasks(cls, split: str) -> list[JSONObject]:
if split == "train":
return []
elif split == "test":
return test_tasks
raise ValueError(f"Unknown split: {split}")
@classmethod
def list_splits(cls) -> list[str]:
return ["train", "test"]
def get_prompt(self) -> str:
return [TextBlock(type="text", text=self.config.problem)]
@tool
async def answer(self, params: AnswerParams) -> ToolOutput:
"""
The answer tool can be used to submit your final answer. Note that this finishes the episode.
"""
gold = parse(self.config.answer)
answer = parse(params.answer)
is_correct = verify(gold, answer)
if is_correct:
agent_message = "Correct!"
reward = 1.0
else:
agent_message = "Wrong!"
reward = 0.0
return ToolOutput(
blocks=[TextBlock(type="text", text=agent_message)],
reward=reward,
finished=True
)
if __name__ == "__main__":
Server([AIME2024]).run()