Using pyAerial for data generation by simulation#

This notebook generates a fully 5G NR compliant PUSCH/PDSCH dataset using NVIDIA cuPHY through its Python bindings in pyAerial for PUSCH/PDSCH slot generation and radio channel modeling. PUSCH/PDSCH slots get generated and transmitted through different radio channels. Usually, in order to make models as generalizable as possible, it is desirable to train the models with as wide variety of different channel models as possible. This notebook enables generation of a dataset containing samples generated with a number of different channel models, including e.g. those used by 3GPP, as well as with different MCS classes and other transmission parameters.

Imports#

[1]:
import warnings
warnings.filterwarnings('ignore')

import itertools
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import cupy as cp
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

from aerial.phy5g.pdsch import PdschTx
from aerial.phy5g.ldpc.util import get_mcs, random_tb
from aerial.phy5g.channel_models import FadingChannel
from aerial.phy5g.channel_models import TdlChannelConfig
from aerial.phy5g.channel_models import CdlChannelConfig
from aerial.util.fapi import dmrs_bit_array_to_fapi
from aerial.util.data import PuschRecord
from aerial.util.data import save_pickle

Dataset generation parameters#

The parameters used to generate the dataset are modified here. Note that some parameters are given as lists, meaning that multiple values may be given for those parameters. Typically one would like the training dataset to be as diverse as possible in order to make the models generalize well to various channel conditions and to different transmission parameters.

[2]:
# This is the target directory. It gets created if it does not exist.
dataset_dir = 'data/example_simulated_dataset/QPSK'
os.makedirs(dataset_dir, exist_ok=True)

# Number of samples is divided roughly evenly between the options below.
num_samples = 20000

# Channel configuration: channel_type is "tdl" or "cdl", delay_profile is 'A', 'B', 'C', 'D', 'E'
channel_type = "tdl"       # Channel type: "tdl" or "cdl"
delay_profiles = ["A"]     # Delay profiles as per 3GPP TR 38.901

# Speeds to include in the dataset
# This is UE speed in m/s. Used to calculate Doppler shift.
speeds = [0.8333]

# Delay spreads to include in the dataset.
# This is the RMS delay spread in nanoseconds.
delay_spreads = [30.0]

# A list of MCS indices (as per TS 38.214) to include in the dataset.
# MCS table value refers to TS 38.214 as follows:
# 1: TS38.214, table 5.1.3.1-1.
# 2: TS38.214, table 5.1.3.1-2.
# 3: TS38.214, table 5.1.3.1-3.
mcss = [2]
mcs_table = 1

# Es/No values to include in the dataset.
esnos = list(np.arange(-7.5, 2.5, 1.0))  # For MCS 2, needs a change for other MCSs

# These are fixed for the dataset.
num_tx_ant = 1
num_rx_ant = 2
cell_id = 41
carrier_frequency = 3.5e9  # Carrier frequency in Hz.
layers = 1
rnti = 20001
scid = 0
data_scid = 41
dmrs_port = 1
dmrs_position = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
start_sym = 0
num_symbols = 14
num_prbs = 273

# Numerology and frame structure. See TS 38.211.
mu = 1
num_ofdm_symbols = 14
fft_size = 4096
num_guard_subcarriers = (410, 410)
num_slots_per_frame = 20

Channel modeling#

The radio channel is simulated using the pyAerial GPU-accelerated FadingChannel class from aerial.phy5g.channel_models. It supports both TDL (Tapped Delay Line) and CDL (Clustered Delay Line) channel models as defined in 3GPP TR 38.901.

[3]:
def create_channel(channel_type, delay_profile, delay_spread, speed, num_tx_ant, num_rx_ant, carrier_frequency):
    """Create a FadingChannel with the specified configuration.

    Args:
        channel_type: "tdl" or "cdl"
        delay_profile: Delay profile 'A', 'B', 'C', 'D', 'E'
        delay_spread: RMS delay spread in nanoseconds
        speed: UE speed in m/s
        num_tx_ant: Number of TX (UE) antennas
        num_rx_ant: Number of RX (gNB) antennas
        carrier_frequency: Carrier frequency in Hz

    Returns:
        FadingChannel object configured for the given parameters.
    """
    # Calculate Doppler shift from speed and carrier frequency
    max_doppler_shift = speed * carrier_frequency / 3e8

    # Create channel configuration
    if channel_type == "tdl":
        channel_config = TdlChannelConfig(
            delay_profile=delay_profile,
            delay_spread=delay_spread,
            max_doppler_shift=max_doppler_shift,
            n_bs_ant=num_rx_ant,  # gNB antennas (receiver in uplink)
            n_ue_ant=num_tx_ant   # UE antennas (transmitter in uplink)
        )
    elif channel_type == "cdl":
        channel_config = CdlChannelConfig(
            delay_profile=delay_profile,
            delay_spread=delay_spread,
            max_doppler_shift=max_doppler_shift,
            bs_ant_size=(1, num_rx_ant // 2, 2),  # gNB antenna array (dual-pol)
            ue_ant_size=(1, num_tx_ant, 1)        # UE antenna array (single-pol)
        )
    else:
        raise ValueError(f"Invalid channel type: {channel_type}. Use 'tdl' or 'cdl'.")

    # Create FadingChannel with OFDM parameters
    n_sc = fft_size - sum(num_guard_subcarriers)
    channel = FadingChannel(
        channel_config=channel_config,
        n_sc=n_sc,
        numerology=mu,
        n_fft=fft_size,
        n_symbol_slot=num_ofdm_symbols
    )
    return channel


def apply_channel(channel, tx_signal, snr_db, slot_idx):
    """Apply fading channel with AWGN noise.

    Args:
        channel: FadingChannel object.
        tx_signal: Transmitted signal, shape (n_sc, n_symbol, n_tx_ant).
        snr_db: Signal-to-noise ratio in dB.
        slot_idx: Slot index for time-varying channel.

    Returns:
        Received signal after channel and noise, shape (n_sc, n_symbol, n_rx_ant).
    """
    # Reshape for FadingChannel: (n_cell, n_ue, n_tx_ant, n_symbol, n_sc)
    tx_reshaped = tx_signal.transpose((2, 1, 0))[None, None, ...]

    # Run channel (uplink: swap tx/rx to apply channel in uplink direction)
    rx_signal = channel(
        freq_in=tx_reshaped,
        tti_idx=slot_idx,
        snr_db=snr_db,
        enable_swap_tx_rx=True
    )

    # Reshape back to (n_sc, n_symbol, n_rx_ant)
    return rx_signal[0, 0].transpose((2, 1, 0))

PDSCH transmitter#

This creates the PDSCH transmitter. However due to the symmetry of 5G NR PDSCH and PUSCH, this may be used to generate also PUSCH frames with certain parameterization. In this notebook this is used as a PUSCH transmitter to generate uplink slots.

[4]:
pxsch_tx = PdschTx(
    cell_id=cell_id,
    num_rx_ant=num_tx_ant,
    num_tx_ant=num_tx_ant,
)

Dataset generation#

The actual dataset generation is done here. The different channel, SNR and MCS parameters are swept through, with a number of samples per parameterization chosen such that the total number of samples will be close to the desired number.

The PxSCH transmitter created above is used to generate a Tx frame. This Tx frame is then fed through the simulated radio channel. The resulting data is recorded in a Parquet file containing PUSCH records following roughly the Small Cell Forum FAPI specification format.

[5]:
num_cases = len(delay_profiles) * len(esnos) * len(speeds) * len(delay_spreads) * len(mcss)
num_samples_per_param = num_samples // num_cases

# loop different channel models, speeds, delay spreads, MCS levels etc.
pusch_records = []
for (delay_profile, esno, speed, delay_spread, mcs) in \
        (pbar := tqdm(itertools.product(delay_profiles, esnos, speeds, delay_spreads, mcss), total=num_cases)):

    status_str = f"Generating... ({channel_type}-{delay_profile} | {esno} dB | {speed} m/s | {delay_spread} ns | MCS {mcs})"
    pbar.set_description(status_str)

    # Create the channel model.
    channel = create_channel(
        channel_type=channel_type,
        delay_profile=delay_profile,
        delay_spread=delay_spread,
        speed=speed,
        num_tx_ant=num_tx_ant,
        num_rx_ant=num_rx_ant,
        carrier_frequency=carrier_frequency
    )

    for sample in range(num_samples_per_param):
        # Generate the dataframe.
        slot_number = sample % num_slots_per_frame

        # Get modulation order and coderate.
        mod_order, coderate = get_mcs(mcs, mcs_table)
        tb_input = random_tb(
            mod_order=mod_order,
            code_rate=coderate,
            dmrs_syms=dmrs_position,
            num_prbs=num_prbs,
            start_sym=start_sym,
            num_symbols=num_symbols,
            num_layers=layers)
        tb_input = cp.array(tb_input, order='F', dtype=cp.uint8)  # Move to GPU

        # Transmit PxSCH. This is where we set the dynamically changing parameters.
        # Input parameters are given as lists as the interface supports multiple UEs.
        tx_tensor = pxsch_tx.run(
            tb_inputs=[tb_input],         # Input transport block in bytes
            num_ues=1,                    # We simulate only one UE here.
            slot=slot_number,             # Slot number.
            dmrs_syms=dmrs_position,      # List of binary numbers indicating which symbols are DMRS.
            start_sym=start_sym,          # Start symbol index.
            num_symbols=num_symbols,      # Number of symbols.
            scids=[scid],                 # DMRS scrambling ID.
            layers=[layers],              # Number of layers (transmission rank).
            dmrs_ports=[dmrs_port],       # DMRS port(s) to be used.
            rntis=[rnti],                 # UE RNTI.
            data_scids=[data_scid],       # Data scrambling ID.
            code_rates=[coderate * 10],   # Code rate
            mod_orders=[mod_order]        # Modulation order
        )

        # Channel transmission and noise using FadingChannel.
        rx_tensor = apply_channel(channel, tx_tensor, esno, sample)
        # Convert from GPU (CuPy) to CPU (NumPy) for saving.
        rx_tensor = rx_tensor.get()
        No = pow(10., -esno / 10.)

        # Save the sample.
        channel_name = f"{channel_type}-{delay_profile}"
        rx_iq_data_filename = "rx_iq_{}_esno{}_speed{}_ds{}_mcs{}_{}.pkl".format(channel_name, esno, speed, delay_spread, mcs, sample)
        rx_iq_data_fullpath = os.path.join(dataset_dir, rx_iq_data_filename)
        save_pickle(data=rx_tensor, filename=rx_iq_data_fullpath)

        # Save noise power and SNR data as user data.
        user_data_filename = "user_data_{}_esno{}_speed{}_ds{}_mcs{}_{}.pkl".format(channel_name, esno, speed, delay_spread, mcs, sample)
        user_data_fullpath = os.path.join(dataset_dir, user_data_filename)
        user_data = dict(
            snr=esno,
            noise_var=No
        )
        save_pickle(data=user_data, filename=user_data_fullpath)

        pusch_record = PuschRecord(
            # SCF FAPI 10.02 UL_TTI.request message parameters:
            pduIdx=0,
            SFN=(sample // num_slots_per_frame) % 1023,
            Slot=slot_number,
            nPDUs=1,
            RachPresent=0,
            nULSCH=1,
            nULCCH=0,
            nGroup=1,
            PDUSize=0,
            pduBitmap=1,
            RNTI=rnti,
            Handle=0,
            BWPSize=273,
            BWPStart=0,
            SubcarrierSpacing=mu,
            CyclicPrefix=0,
            targetCodeRate=coderate * 10,
            qamModOrder=mod_order,
            mcsIndex=mcs,
            mcsTable=mcs_table - 1,  # Different indexing
            TransformPrecoding=1,  # Disabled.
            dataScramblingId=data_scid,
            nrOfLayers=1,
            ulDmrsSymbPos=dmrs_bit_array_to_fapi(dmrs_position),
            dmrsConfigType=0,
            ulDmrsScramblingId=cell_id,
            puschIdentity=cell_id,
            SCID=scid,
            numDmrsCdmGrpsNoData=2,
            dmrsPorts=1,  # Note that FAPI uses a different format compared to cuPHY.
            resourceAlloc=1,
            rbBitmap=np.array(36 * [0]),
            rbStart=0,
            rbSize=273,
            VRBtoPRBMapping=0,
            FrequencyHopping=0,
            txDirectCurrentLocation=0,
            uplinkFrequencyShift7p5khz=0,
            StartSymbolIndex=start_sym,
            NrOfSymbols=num_symbols,
            puschData=None,
            puschUci=None,
            puschPtrs=None,
            dftsOfdm=None,
            Beamforming=None,

            # SCF FAPI 10.02 RxData.indication message parameters:
            HarqID=0,
            PDULen=len(tb_input),
            UL_CQI=255,  # Set to invalid 0xFF.
            TimingAdvance=0,
            RSSI=65535,  # Set to invalid 0xFFFF.
            macPdu=tb_input.get(),

            TbCrcStatus=0,
            NumCb=0,
            CbCrcStatus=None,

            rx_iq_data_filename=rx_iq_data_filename,
            user_data_filename=user_data_filename,

            errInd = ""
        )
        pusch_records.append(pusch_record)

print("Saving...")
df_filename = os.path.join(dataset_dir, "l2_metadata.parquet")
df = pd.DataFrame.from_records(pusch_records, columns=PuschRecord._fields)
df.to_parquet(df_filename, engine="pyarrow")
print("All done!")
Saving...
All done!