from pydantic import BaseModel
from openreward.environments import Environment, JSONObject, Server, Split, TextBlock, ToolOutput, tool
class AnswerParams(BaseModel):
answer: str
# --- Basic Arithmetic ---
class BasicArithmeticTaskSpec(BaseModel):
id: str
problem: str
answer: int
basic_tasks = [
{"id": "0", "problem": "What is 7 + 3?", "answer": 10},
{"id": "1", "problem": "What is 15 - 8?", "answer": 7},
]
class BasicArithmetic(Environment):
"""Addition and subtraction problems."""
def __init__(self, task_spec: JSONObject = {}, secrets: dict[str, str] = {}):
super().__init__(task_spec)
self.config = BasicArithmeticTaskSpec.model_validate(task_spec)
@classmethod
def list_splits(cls):
return [Split("train", type="train")]
@classmethod
def list_tasks(cls, split: str) -> list[JSONObject]:
if split == "train":
return basic_tasks
raise ValueError(f"Unknown split: {split}")
def get_prompt(self):
return [TextBlock(type="text", text=self.config.problem)]
@tool
async def answer(self, params: AnswerParams) -> ToolOutput:
"""Submit your final answer."""
try:
is_correct = int(params.answer) == self.config.answer
except ValueError:
is_correct = False
return ToolOutput(
blocks=[TextBlock(type="text", text="Correct!" if is_correct else "Wrong!")],
reward=1.0 if is_correct else 0.0,
finished=True,
)
# --- Bitwise Arithmetic ---
class BitwiseArithmeticTaskSpec(BaseModel):
id: str
problem: str
answer: int
bitwise_tasks = [
{"id": "0", "problem": "What is 5 AND 3? (bitwise)", "answer": 1},
{"id": "1", "problem": "What is 5 OR 3? (bitwise)", "answer": 7},
{"id": "2", "problem": "What is 5 XOR 3? (bitwise)", "answer": 6},
]
class BitwiseArithmetic(Environment):
"""Bitwise operation problems."""
def __init__(self, task_spec: JSONObject = {}, secrets: dict[str, str] = {}):
super().__init__(task_spec)
self.config = BitwiseArithmeticTaskSpec.model_validate(task_spec)
@classmethod
def list_splits(cls):
return [Split("train", type="train")]
@classmethod
def list_tasks(cls, split: str) -> list[JSONObject]:
if split == "train":
return bitwise_tasks
raise ValueError(f"Unknown split: {split}")
def get_prompt(self):
return [TextBlock(type="text", text=self.config.problem)]
@tool
async def answer(self, params: AnswerParams) -> ToolOutput:
"""Submit your final answer."""
try:
is_correct = int(params.answer) == self.config.answer
except ValueError:
is_correct = False
return ToolOutput(
blocks=[TextBlock(type="text", text="Correct!" if is_correct else "Wrong!")],
reward=1.0 if is_correct else 0.0,
finished=True,
)