pdc_project/encoder.py

# encoder.py  — 2-char/1-codeword implementation
import numpy as np
from typing import Tuple

##############################################################################
# Public constants
##############################################################################
CHAR_SET = (
        [chr(i) for i in range(ord('a'), ord('z')+1)] +
        [chr(i) for i in range(ord('A'), ord('Z')+1)] +
        [str(i) for i in range(10)] +
        [' ', '.']
)
assert len(CHAR_SET) == 64
CHAR_TO_IDX = {c: i for i, c in enumerate(CHAR_SET)}
IDX_TO_CHAR = {i: c for c, i in CHAR_TO_IDX.items()}

G             = 10.0                 # channel gain
ENERGY_LIMIT  = 2000.0               # global limit ‖x‖²
TEXT_LEN      = 40                   # must stay 40

##############################################################################
# Hadamard-codebook utilities
##############################################################################
def _hadamard(r: int) -> np.ndarray:
    if r == 0:
        return np.array([[1.]], dtype=np.float32)
    H = _hadamard(r-1)
    return np.block([[H,  H],
                     [H, -H]])

def _Br(r: int) -> np.ndarray:
    H = _hadamard(r)
    return np.vstack([H, -H])         # 2^(r+1) × 2^r

##############################################################################
# Public API
##############################################################################
def make_codebook(r: int = 11,
                  num_blocks: int = TEXT_LEN//2,
                  energy_budget: float = ENERGY_LIMIT
                 ) -> Tuple[np.ndarray, float]:
    """
    Builds the scaled codebook C (4096×4096) used by both encoder & decoder.

    α is chosen so that **after the per-sample duplication** in encode_message,
    a full 20-block message consumes exactly `energy_budget`.
    """
    B           = _Br(r)                              # 4096 × 2048
    C           = np.hstack((B, B)).astype(np.float32)  # 4096 × 4096
    n           = C.shape[1]                          # 4096
    dup_factor  = 2                                   # sample-duplication
    alpha       = energy_budget / (num_blocks * dup_factor * n)
    C *= np.sqrt(alpha, dtype=C.dtype)
    return C, alpha

def pair_to_index(a: str, b: str) -> int:
    return 64 * CHAR_TO_IDX[a] + CHAR_TO_IDX[b]

def index_to_pair(k: int) -> tuple[str, str]:
    if not 0 <= k < 4096:
        raise ValueError("index out of range [0,4095]")
    return IDX_TO_CHAR[k // 64], IDX_TO_CHAR[k % 64]

def encode_message(msg: str, C: np.ndarray) -> np.ndarray:
    """
    Encode a 40-character message.  Each 2-character pair → one codeword row.
    After concatenation the whole signal is duplicated sample-wise so that
    the channel’s even / odd indices each carry one clean copy.
    """
    if len(msg) != TEXT_LEN:
        raise ValueError("Message must be exactly 40 characters.")

    pairs   = [(msg[i], msg[i+1]) for i in range(0, TEXT_LEN, 2)]
    rows    = [C[pair_to_index(a, b)] for a, b in pairs]   # 20×4096
    signal  = np.concatenate(rows).astype(np.float32)
    signal  = np.repeat(signal, 2)                         # duplicate
    # tight numeric safety-check (≡ 2000, barring float error)
    e = np.sum(signal**2)
    if not np.isclose(e, ENERGY_LIMIT, atol=1e-3):
        raise RuntimeError(f"energy sanity check failed ({e:.3f} ≠ 2000)")
    return signal