Source code for nemo_eval.adapters.interceptors.reasoning_interceptor

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import json
import logging
import re
from typing import final

import requests

from .types import AdapterResponse, ResponseInterceptor


[docs] def _clean_reasoning_tokens(response: requests.Response, end_reasoning_token: str) -> requests.Response: """ Clean up reasoning tokens from the response. Args: response: The API response object from requests end_reasoning_token: Token that marks the end of reasoning section Returns: Response with reasoning tokens removed """ try: if "application/json" not in response.headers.get("Content-Type", ""): return response status_code = response.status_code headers = response.headers response_data = response.json() # Iterate over the choices and their messages for choice in response_data.get("choices", []): message = choice.get("message", {}) if message.get("role") == "assistant": content = message.get("content", "") if not isinstance(content, str): # particularily, content can be None with function calling continue # Remove everything between start and end reasoning tokens # Also handle cases where only end token is present cleaned_content = re.sub( r".*?" + re.escape(end_reasoning_token), "", content, flags=re.DOTALL, ).strip("\n") # Update the content of the message with cleaned text message["content"] = cleaned_content # Log token information reasoning_tokens_info = { "total_words": len(content.split()), "cleaned_words": len(cleaned_content.split()), } print(f"reasoning_tokens_info {json.dumps(reasoning_tokens_info)}") modified_response = requests.Response() modified_response.status_code = status_code modified_response.headers = headers modified_response._content = json.dumps(response_data).encode("utf-8") return modified_response except (ValueError, json.JSONDecodeError) as e: # If not JSON or parsing fails, return original response print(f"Error parsing JSON response: {e}") return response except Exception as e: print(f"Error cleaning reasoning tokens: {e}") logging.exception(e) return response
[docs] @final class ResponseReasoningInterceptor(ResponseInterceptor): """Intercepts responses to clean up reasoning tokens from the content.""" _end_reasoning_token: str def __init__(self, end_reasoning_token: str): self._end_reasoning_token = end_reasoning_token logging.info(f"Evaluation adapter will clean reasoning before `{self._end_reasoning_token}` token`")
[docs] @final def intercept_response(self, ar: AdapterResponse) -> AdapterResponse: return AdapterResponse( r=_clean_reasoning_tokens(ar.r, self._end_reasoning_token), meta=ar.meta, )