Source code for nemo_eval.adapters.interceptors.reasoning_interceptor

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import json
import logging
import re
from typing import final

import requests

from .types import AdapterResponse, ResponseInterceptor



[docs]
def _clean_reasoning_tokens(response: requests.Response, end_reasoning_token: str) -> requests.Response:
    """
    Clean up reasoning tokens from the response.

    Args:
        response: The API response object from requests
        end_reasoning_token: Token that marks the end of reasoning section

    Returns:
        Response with reasoning tokens removed
    """
    try:
        if "application/json" not in response.headers.get("Content-Type", ""):
            return response

        status_code = response.status_code
        headers = response.headers
        response_data = response.json()

        # Iterate over the choices and their messages
        for choice in response_data.get("choices", []):
            message = choice.get("message", {})

            if message.get("role") == "assistant":
                content = message.get("content", "")
                if not isinstance(content, str):
                    # particularily, content can be None with function calling
                    continue

                # Remove everything between start and end reasoning tokens
                # Also handle cases where only end token is present
                cleaned_content = re.sub(
                    r".*?" + re.escape(end_reasoning_token),
                    "",
                    content,
                    flags=re.DOTALL,
                ).strip("\n")

                # Update the content of the message with cleaned text
                message["content"] = cleaned_content

                # Log token information
                reasoning_tokens_info = {
                    "total_words": len(content.split()),
                    "cleaned_words": len(cleaned_content.split()),
                }
                print(f"reasoning_tokens_info {json.dumps(reasoning_tokens_info)}")

        modified_response = requests.Response()
        modified_response.status_code = status_code
        modified_response.headers = headers
        modified_response._content = json.dumps(response_data).encode("utf-8")
        return modified_response

    except (ValueError, json.JSONDecodeError) as e:
        # If not JSON or parsing fails, return original response
        print(f"Error parsing JSON response: {e}")
        return response
    except Exception as e:
        print(f"Error cleaning reasoning tokens: {e}")
        logging.exception(e)
        return response




[docs]
@final
class ResponseReasoningInterceptor(ResponseInterceptor):
    """Intercepts responses to clean up reasoning tokens from the content."""

    _end_reasoning_token: str

    def __init__(self, end_reasoning_token: str):
        self._end_reasoning_token = end_reasoning_token
        logging.info(f"Evaluation adapter will clean reasoning before `{self._end_reasoning_token}` token`")


[docs]
    @final
    def intercept_response(self, ar: AdapterResponse) -> AdapterResponse:
        return AdapterResponse(
            r=_clean_reasoning_tokens(ar.r, self._end_reasoning_token),
            meta=ar.meta,
        )