LLM-Based Columns#

LLM-based columns use large language models to generate intelligent, contextual content. These columns can create text, code, structured data, and even evaluate the quality of other generated content.

Before You Start#

Setup#

Before getting started, ensure you have the Data Designer client and configuration builder set up:

import os
from nemo_microservices import NeMoMicroservices
from nemo_microservices.beta.data_designer import DataDesignerClient, DataDesignerConfigBuilder
from nemo_microservices.beta.data_designer.config import columns as C
from nemo_microservices.beta.data_designer.config import params as P

data_designer_client = DataDesignerClient(
    client=NeMoMicroservices(base_url=os.environ["NEMO_MICROSERVICES_BASE_URL"])
)

config_builder = DataDesignerConfigBuilder(model_configs="path/to/your/model_configs.yaml")

LLM Text Generation#

Generates natural language text based on prompts and context from other columns.

config_builder.add_column(
    name="product_description",
    type="llm-text",
    model_alias="text", 
    prompt="Generate a detailed description for a {{product_category}} product named {{product_name}}.",
)
config_builder.add_column(
    C.LLMTextColumn(
        name="product_description",
        model_alias="text",
        prompt="Generate a detailed description for a {{product_category}} product named {{product_name}}.",
    )
)

LLM Code Generation#

Generates code in various programming languages.

config_builder.add_column(
    name="sql_query",
    type="llm-code",
    model_alias="code",  
    prompt="Generate a Python function that {{function_description}}.",
    output_format="python"  
)
config_builder.add_column(
    C.LLMCodeColumn(
        name="python_function",
        model_alias="code",
        prompt="Write a Python function that {{function_description}}.",
        output_format="python"
    )
)

LLM Structured Generation#

Generates structured data that conforms to predefined schemas using Pydantic models or JSON schemas.

from pydantic import BaseModel

class UserProfile(BaseModel):
    bio: str
    skills: list[str]
    experience_years: int


builder.add_column(
    name="user_profile",
    type="llm-structured",
    model_alias="structured",
    prompt="Generate a user profile for {{user_type}} in {{industry}}.",
    output_format=UserProfile 
)
from pydantic import BaseModel

class UserProfile(BaseModel):
    bio: str
    skills: list[str]
    experience_years: int

builder.add_column(
    C.LLMStructuredColumn(
        name="user_profile",
        model_alias="structured",
        prompt="Generate a user profile for {{user_type}} in {{industry}}.",
        output_format=UserProfile  
    )
)

For more details on structured outputs, see the Structured Outputs guide.

LLM Judge#

Evaluates and scores the quality of generated content using large language models.

from nemo_microservices.beta.data_designer.config.params.rubrics import TEXT_TO_PYTHON_LLM_JUDGE_PROMPT_TEMPLATE, PYTHON_RUBRICS

config_builder.add_column(
    name="code_quality_score",
    type="llm-judge",
    prompt=TEXT_TO_PYTHON_LLM_JUDGE_PROMPT_TEMPLATE,
    rubrics=PYTHON_RUBRICS,
    model_alias="judge"  
)
config_builder.add_column(
    C.LLMJudgeColumn(
        name="response_quality",
        model_alias="judge",  
        prompt="Evaluate the quality of this response: {{response_text}}",
        rubrics=[  
            P.Rubric(
                name="accuracy",
                description="Is the response factually correct?",
                scoring={
                    "4": "Completely accurate with no errors",
                    "3": "Mostly accurate with minor errors", 
                    "2": "Somewhat accurate with some errors",
                    "1": "Mostly inaccurate with major errors",
                    "0": "Completely inaccurate"
                }
            ),
            P.Rubric(
                name="clarity",
                description="Is the response clear and easy to understand?",
                scoring={
                    "4": "Extremely clear and well-structured",
                    "3": "Clear with good structure",
                    "2": "Reasonably clear with adequate structure",
                    "1": "Somewhat unclear with poor structure",
                    "0": "Very unclear and confusing"
                }
            ),
            P.Rubric(
                name="completeness",
                description="Does the response fully address the question?",
                scoring={
                    "4": "Completely addresses all aspects",
                    "3": "Addresses most aspects thoroughly",
                    "2": "Addresses some aspects adequately",
                    "1": "Addresses few aspects superficially",
                    "0": "Does not address the question"
                }
            )
        ]
    )
)

Code Validation#

Validates code syntax and execution in LLM-generated code columns.

config_builder.add_column(
    name="python_validation",
    type="code-validation",
    code_lang="python",
    target_column="generated_python_code"
)
config_builder.add_column(
    C.CodeValidationColumn(
        name="python_validation",
        code_lang="python",
        target_column="generated_python_code"
    )
)

Reference Table#

Simplified API Type

Typed API Equivalent

Description

"llm-text"

LLMTextColumn

LLM-generated text content

"llm-code"

LLMCodeColumn

LLM-generated code content

"llm-structured"

LLMStructuredColumn

LLM-generated structured content

"llm-judge"

LLMJudgeColumn

LLM-based evaluation

"code-validation"

CodeValidationColumn

Code syntax validation