# server.py
from openreward.environments import Environment, Server, TextBlock, ToolOutput, tool
from pydantic import BaseModel
class AnswerParams(BaseModel):
answer: str
class MyEnvironment(Environment):
"""A simple math environment"""
@classmethod
def list_tasks(cls, split: str):
return [
{"id": "task-1", "problem": "What is 2+2?", "answer": 4},
{"id": "task-2", "problem": "What is 5*3?", "answer": 15},
]
@classmethod
def list_splits(cls):
return ["train", "test"]
def get_prompt(self):
return [TextBlock(type="text", text=self.task_spec["problem"])]
@tool
def answer(self, params: AnswerParams) -> ToolOutput:
"""Submit your answer"""
correct = str(self.task_spec["answer"]) == params.answer
return ToolOutput(
blocks=[TextBlock(type="text", text="Correct!" if correct else "Wrong!")],
reward=1.0 if correct else 0.0,
finished=True
)
if __name__ == "__main__":
Server([MyEnvironment]).run()