# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Derate match (NumPy translation of derate_match.m).
Implements TS 38.212 Section 5.4.2 de-rate matching steps using the
MATLAB logic provided in derate_match.m including LBMR, RV offsets,
bit deinterleaving, bit selection undo, filler insertion, and clamping.
"""
import numpy as np
from ran.constants import LLR_CLAMP_ABS
from ran.types import FloatArrayNP, FloatNP, IntArrayNP, IntNP
[docs]
def derate_match( # noqa: PLR0913, PLR0915
llr_descr: FloatArrayNP,
bgn: int,
c: int,
qam_bits: int,
k: int,
f: int,
k_prime: int,
zc: int,
nl: int,
rv_idx: int,
nref: int,
g: int,
) -> tuple[
FloatArrayNP,
int,
IntArrayNP,
IntArrayNP,
]:
"""
De-rate match LLRs into codeblocks.
Implements the de-rate matching steps defined in 3GPP TS 38.212 §5.4.2
using LBMR (limited buffer mother rate), RV offsets, bit deinterleaving,
bit-selection undo, filler insertion, and clamping.
Args:
llr_descr:
(N?, C) float64. Descrambled LLRs arranged per codeblock (CB).
The first dimension must contain at least max(E_r) per CB; only
the first E_r entries in each column are consumed (where E_r is
computed internally from G, C, Q_m, and N_layers).
bgn:
int. LDPC base graph number (1 or 2).
c:
int. Number of codeblocks (CBs) after transport-block segmentation.
qam_bits:
int. Modulation order Q_m in bits per symbol
(1=BPSK, 2=QPSK, 4=16QAM, 6=64QAM, 8=256QAM).
k:
int. Codeblock length AFTER filler insertion, i.e., the expanded
CB length used by the mother LDPC code before parity section
(K). By definition K = K' + f. Indices in this function are
expressed relative to K (e.g., the filler region is
[K'-2Z_c, K-2Z_c)).
f:
int. Number of filler bits in the CB. These positions are not
transmitted and are reinserted with clamped LLRs during de-rate
matching. By definition f = K - K'.
k_prime:
int. Codeblock length BEFORE filler insertion (K'). This includes
the information bits for the CB plus the CB-CRC, but excludes
filler bits.
zc:
int. LDPC lifting size Z_c (submatrix dimension used to expand the
base graph). It sets the mother code lengths:
N = Z_c * (66 for BG1, 50 for BG2).
nl:
int. Number of layers (N_layers) used for transmission.
rv_idx:
int. Redundancy version index (0..3) used to compute the circular
buffer read-out offset k0 for HARQ de-rate matching.
nref:
int. Optional cap on the per-CB circular buffer length (N_ref).
If > 0, n_cb = min(N, N_ref); otherwise n_cb = N. This mirrors
the spec's LBMR behavior when resources limit the read-out span.
g:
int. Total number of coded bits G allocated to this codeword
across all layers for the scheduled PDSCH/PUSCH allocation. G is
used to compute the per-CB bit budgets E_r.
Returns
-------
derate_cbs:
(N, C) float64. LLRs per CB after de-rate matching, including
filler re-insertion and clamping, where N = Z_c * (66 or 50).
nv_parity:
int. Number of parity variable nodes inferred for the read-out,
clamped to [4, 46] for BG1 or [4, 42] for BG2 in this
implementation.
derate_cbs_indices:
(N, C) int64. One-based indices (with final offset applied) of
the mother-code positions that each consumed LLR contributed to,
per CB.
derate_cbs_sizes:
(C,) int64. The number of consumed bits E_r per CB column.
Notes
-----
• Mother code length: N = Z_c * (66 for BG1, 50 for BG2).
• K (k) and K' (k_prime) differ by the filler count f = K - K'.
Filler positions are not transmitted and are reconstructed by
assigning clamped LLRs in [K'-2Z_c, K-2Z_c).
• G determines the per-CB E_r through the standard split across C CBs,
modulation order Q_m, and N_layers.
"""
# Compute per-codeblock bit budgets E_r (vector "e"): how many coded bits
# are available/consumed for each CB r after accounting for modulation and
# layers. TS 38.212 splits total coded bits G across C CBs such that most
# CBs get floor(G/C') and the remainder get ceil(G/C'), where C' = C * Q_m * N_layers
# expressed in bits per symbol across layers.
nl_qam = qam_bits * nl # Q_m * N_layers (bits per symbol across all layers)
# Denominator C' = C * (Q_m * N_layers)
denom = c * nl_qam
# floor(G / C') scaled back to bits (multiple of Q_m * N_layers)
e_floor = nl_qam * (g // denom)
# ceil(G / C') using integer math trick: ceil(a/b) = (a + b - 1) // b
e_ceil = nl_qam * ((g + denom - 1) // denom)
# Determine how many CBs receive e_ceil instead of e_floor. The spec assigns
# the larger share to the last (mod(G/(Q_m*N_layers), C)) CBs. We compute the
# remainder and a threshold so that r <= threshold -> e_floor else e_ceil.
rem = (g // nl_qam) % c # remainder CB count when distributing symbols
threshold = c - rem - 1 # index cutoff between floor and ceil allocations
r_idx: IntArrayNP = np.arange(c, dtype=IntNP) # CB index r = 0..C-1
# e[r] holds E_r, i.e., number of coded bits consumed for CB r
e = np.where(r_idx <= threshold, e_floor, e_ceil).astype(IntNP, copy=False)
# Limited Buffer Mother Rate (LBMR): the circular buffer length used during
# read-out. Mother-code length N depends on base graph (BG1: 66*Z_c, BG2: 50*Z_c).
# Optionally cap by N_ref (if > 0) per spec when resource-limited.
n = zc * (66 if bgn == 1 else 50) # Mother-code length N
n_cb = min(n, nref) if nref > 0 else n # Effective circular buffer length
# Redundancy Version (RV) offset k0: start position in the circular buffer.
# Per 38.212, k0 = floor((v * N_cb) / (Z_c * D)) * Z_c with v from a small
# table that depends on BG and RV, and D in {66, 50} (BG1/BG2). We pre-store
# the numerators (v values) in a tuple and compute k0 branchlessly.
denom = 66 if bgn == 1 else 50 # D (BG-dependent constant)
numerators = (0, 17, 33, 56) if bgn == 1 else (0, 13, 25, 43) # v per RV
if bgn not in (1, 2):
msg = "BGN is not supported"
raise ValueError(msg)
if rv_idx < 0 or rv_idx >= len(numerators):
msg = "rv is not supported"
raise ValueError(msg)
num = numerators[rv_idx]
# k0 is a multiple of Z_c; for v=0 the spec defines k0 = 0.
k0 = 0 if num == 0 else ((num * n_cb) // (denom * zc)) * zc
# Parity section sizing: infer how many parity variable-node groups were read.
# max_llr_per_cb: theoretical max read length per CB (bounded by N)
# max_parity_nodes: BG-dependent cap on parity groups
# nllr_per_cb: total consumed LLRs per CB considering k0 offset (circular)
# nsym_llr_per_cb: systematic portion size = K' - 2*Z_c (excludes filler)
# npar_llr_per_cb: remaining LLRs attributed to parity section
max_llr_per_cb = zc * (66 if bgn == 1 else 50)
max_parity_nodes = 46 if bgn == 1 else 42
nllr_per_cb = min(g // c + k0, max_llr_per_cb)
nsym_llr_per_cb = k - f - 2 * zc
npar_llr_per_cb = nllr_per_cb - nsym_llr_per_cb
# Compute number of parity variable nodes (nv_parity). Each node accounts for
# a Z_c-sized group in the parity portion. Clamp to [4, max_parity_nodes] to
# mirror MATLAB and hardware behavior.
nv_parity = max(4, min((npar_llr_per_cb + zc - 1) // zc, max_parity_nodes))
# Outputs
# derate_cbs: accumulated LLRs in the mother-code domain for each CB (shape N x C)
derate_cbs: FloatArrayNP = np.zeros((n, c), dtype=FloatNP)
# derate_cbs_indices: for traceability, the 1-based mother-code indices that
# each consumed LLR mapped to (per CB), after final offset.
derate_cbs_indices: IntArrayNP = np.zeros((n, c), dtype=IntNP)
# derate_cbs_sizes: the number of consumed bits E_r for each CB (length C)
derate_cbs_sizes: IntArrayNP = np.zeros((c,), dtype=IntNP)
# Process each codeblock
current_bit = 0 # cursor into flattened LLR stream across CBs
llr_descr_vec = llr_descr.ravel(order="F") # column-major flatten (per-CB contiguous)
# Build acceptance mask in the circular buffer domain (1-based indexing as in spec):
# accept positions outside the filler region [K'-2Z_c, K-2Z_c). This inverts the
# bit-selection performed during rate-matching.
idx1b_base: IntArrayNP = np.arange(1, n_cb + 1, dtype=IntNP)
accept_mask_base = (idx1b_base <= (k_prime - 2 * zc)) | (idx1b_base > (k - 2 * zc))
# Apply RV offset: rotate mask by -k0 so that index 1 corresponds to the read-out start
accept_mask_cycle = np.roll(accept_mask_base, -k0)
# j_accept_cycle: 0-based indices within one n_cb-long cycle that are accepted
j_accept_cycle = np.nonzero(accept_mask_cycle)[0].astype(IntNP)
num_accept_per_cycle = j_accept_cycle.size # accepted positions per cycle
if num_accept_per_cycle == 0:
msg = "No acceptable positions found in cycle; check parameters"
raise ValueError(msg)
for ci in range(c):
e_c = e[ci] # E_r for CB r=ci (number of bits to consume for this CB)
# Slice this CB's LLRs: [current_bit, current_bit + E_r)
llr_c = llr_descr_vec[current_bit : current_bit + e_c]
# Undo bit interleaving across Q_m: reshape to (E_r/Q_m, Q_m) then transpose
# to restore bit order per symbol, finally flatten back.
llr_c_mat = np.reshape(llr_c, (qam_bits, e_c // qam_bits), order="F").T
llr_c = llr_c_mat.ravel(order="F")
# Generate the accepted index sequence for exactly E_r positions by
# repeating a full acceptance cycle and truncating to length E_r.
reps = (e_c + num_accept_per_cycle - 1) // num_accept_per_cycle
j_seq = np.tile(j_accept_cycle, reps)[:e_c]
idx_seq = (k0 + j_seq) % n_cb # 0-based indices in circular buffer domain
idx1b_seq = idx_seq + 1 # 1-based version for output traceability
# Accumulate LLRs into mother-code positions. Multiple reads can map to the
# same index due to circularity; use bincount to sum contributions.
add_sums = np.bincount(idx_seq, weights=llr_c, minlength=n_cb)
derate_cbs[:, ci] += add_sums
# Record the mapping indices and E_r for this CB
derate_cbs_indices[:e_c, ci] = idx1b_seq
derate_cbs_sizes[ci] = e_c
current_bit += e_c
# Reinsert filler positions in mother-code domain: [K'-2Z_c, K-2Z_c). These were
# not transmitted; set them to saturated LLRs to reflect certainty placeholders.
start_fill = k_prime - 2 * zc
end_fill = k - 2 * zc
if end_fill > start_fill:
derate_cbs[start_fill:end_fill, :] = LLR_CLAMP_ABS
# Saturate LLRs to match hardware/Matlab clamping behavior
np.clip(derate_cbs, -LLR_CLAMP_ABS, LLR_CLAMP_ABS, out=derate_cbs)
# Convert indices to the C/Matlab offset convention (add 2*Z_c - 1)
derate_cbs_indices = derate_cbs_indices + (2 * zc - 1)
return derate_cbs, nv_parity, derate_cbs_indices, derate_cbs_sizes
__all__ = ["derate_match"]