Program Listing for File utils.cpp
↰ Return to documentation for file (python/morpheus_llm/morpheus_llm/_lib/src/llm/utils.cpp
)
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "morpheus_llm/llm/utils.hpp"
#include "morpheus_llm/llm/input_map.hpp"
#include "morpheus/utilities/string_util.hpp"
#include <glog/logging.h>
#include <nlohmann/json.hpp>
#include <algorithm>
#include <cstddef>
#include <functional>
#include <iterator>
#include <regex>
#include <set>
#include <sstream>
#include <stdexcept>
#include <string_view>
namespace morpheus::llm {
// Doxygen has problems parsing this regex
#if !defined(DOXYGEN_SHOULD_SKIP_THIS)
// Use this regex
const std::regex VALID_INPUT_NAME(R"([a-zA-Z_][a-zA-Z0-9_]*)", std::regex_constants::ECMAScript);
#endif// DOXYGEN_SHOULD_SKIP_THIS
bool is_valid_node_name(std::string_view name)
{
return std::regex_match(name.begin(), name.end(), VALID_INPUT_NAME);
}
bool find_matching_input_for_placeholder(UserInputMapping& input_map,
size_t curr_idx,
const std::vector<std::string>& input_names)
{
CHECK_EQ(input_map.internal_name, "-")
<< "Called find_matching_input_for_placeholder() with a non placeholder input";
std::string raw_name = input_map.external_name;
// If we start with a slash, that means we are mapping from another node, not a parent. Try to find a matching name
// from the last name. i.e. /node/input -> input
if (raw_name[0] == '/')
{
// Decompose it
auto name_j_pointer = nlohmann::json::json_pointer(raw_name);
raw_name = name_j_pointer.back();
}
// Match by name
auto found = std::find(input_names.begin(), input_names.end(), raw_name);
if (found != input_names.end())
{
input_map.internal_name = *found;
return true;
}
if (curr_idx >= input_names.size())
{
throw std::invalid_argument(MORPHEUS_CONCAT_STR(
"Invalid input name '"
<< input_map.external_name
<< "'. Unable to automatically map the external node to an internal name. Matching by name failed and "
"current index exceeds the bounds of the input names. Current index: "
<< curr_idx << ", Input names size: " << input_names.size()));
}
input_map.internal_name = input_names[curr_idx];
return false;
}
input_mappings_t process_input_names(user_input_mappings_t user_inputs, const std::vector<std::string>& input_names)
{
input_mappings_t intermediate_inputs;
input_mappings_t final_inputs;
user_input_mappings_t wildcard_inputs;
// The process for converting user specified inputs into the final inputs is as follows:
// 1. Loop over all inputs and replace any placeholder inputs with the actual inputs
// a. If the node name is "-", then replace it with the input name
// b. If the node name contains "*", then separate it out to process wildcards last
// 2. Loop over all wildcard inputs and replace wildcards with remaining input names
bool is_matching_by_name = false;
// Loop over all inputs replacing '-' placeholders and separating out wildcards
for (size_t i = 0; i < user_inputs.size(); ++i)
{
auto& single_input = user_inputs[i];
bool found_star_node_name = single_input.external_name.find('*') != std::string::npos;
bool found_star_input_name = single_input.internal_name == "*";
if (found_star_input_name != found_star_node_name)
{
if (found_star_input_name)
{
throw std::invalid_argument(
"LLMNode::add_node() called with a placeholder external name but no placeholder internal name");
}
else
{
throw std::invalid_argument(
"LLMNode::add_node() called with a placeholder internal name but no placeholder external name");
}
}
else if (found_star_input_name && found_star_node_name)
{
// Need to process these after the non-placeholder inputs
wildcard_inputs.push_back(single_input);
}
else
{
// No placeholder, so just add the input. If the node_name == "-", then replace it with the input name
if (single_input.internal_name == "-")
{
// We have a placeholder input name, so we need to find the matching input name
bool matched_by_name = find_matching_input_for_placeholder(single_input, i, input_names);
if (matched_by_name)
{
is_matching_by_name = true;
}
else if (is_matching_by_name)
{
throw std::invalid_argument(MORPHEUS_CONCAT_STR(
"Invalid input name '" << single_input.external_name
<< "'. Unable to automatically map the external node to an internal "
"name. Cannot mix matching by name and matching by index"));
}
}
// Add it to the final list
final_inputs.emplace_back(single_input.external_name, single_input.internal_name);
}
}
// Finally, process the wildcards
if (!wildcard_inputs.empty())
{
// TODO(MDD): Support multiple placeholders
CHECK_EQ(wildcard_inputs.size(), 1) << "Only a single placeholder input is currently supported";
std::set<std::string> specified_names;
std::transform(final_inputs.begin(),
final_inputs.end(),
std::inserter(specified_names, specified_names.begin()),
[](const auto& input) {
return input.internal_name;
});
std::set<std::string> total_names(input_names.begin(), input_names.end());
std::vector<std::string> remaining_names;
// Find the remaining names
std::set_difference(total_names.begin(),
total_names.end(),
specified_names.begin(),
specified_names.end(),
std::back_inserter(remaining_names));
auto star_input_name_loc = wildcard_inputs[0].external_name.find('*');
// Loop over the remaining names and add them to the final inputs
for (const auto& remaining_name : remaining_names)
{
// Make a copy of the string to avoid modifying the original
auto replaced = std::string(wildcard_inputs[0].external_name);
replaced.replace(star_input_name_loc, 1, remaining_name);
final_inputs.emplace_back(replaced, remaining_name);
}
}
if (input_names.size() != final_inputs.size())
{
throw std::invalid_argument(MORPHEUS_CONCAT_STR(
"The number of inputs provided does not match the number of inputs expected by the node. Provided: "
<< final_inputs.size() << ", Expected: " << input_names.size()));
}
std::set<std::string> specified_names;
std::transform(final_inputs.begin(),
final_inputs.end(),
std::inserter(specified_names, specified_names.begin()),
[](const auto& input) {
return input.internal_name;
});
std::set<std::string> total_names(input_names.begin(), input_names.end());
if (specified_names != total_names)
{
throw std::invalid_argument(MORPHEUS_CONCAT_STR(
"The names of the inputs provided do not match the names of the inputs expected by the node. Provided: "
<< StringUtil::array_to_str(specified_names.begin(), specified_names.end())
<< ", Expected: " << StringUtil::array_to_str(total_names.begin(), total_names.end())));
}
return final_inputs;
}
} // namespace morpheus::llm