# encoder.py — 2-char/1-codeword implementation import numpy as np from typing import Tuple ############################################################################## # Public constants ############################################################################## CHAR_SET = ( [chr(i) for i in range(ord('a'), ord('z')+1)] + [chr(i) for i in range(ord('A'), ord('Z')+1)] + [str(i) for i in range(10)] + [' ', '.'] ) assert len(CHAR_SET) == 64 CHAR_TO_IDX = {c: i for i, c in enumerate(CHAR_SET)} IDX_TO_CHAR = {i: c for c, i in CHAR_TO_IDX.items()} G = 10.0 # channel gain ENERGY_LIMIT = 2000.0 # global limit ‖x‖² TEXT_LEN = 40 # must stay 40 ############################################################################## # Hadamard-codebook utilities ############################################################################## def _hadamard(r: int) -> np.ndarray: if r == 0: return np.array([[1.]], dtype=np.float32) H = _hadamard(r-1) return np.block([[H, H], [H, -H]]) def _Br(r: int) -> np.ndarray: H = _hadamard(r) return np.vstack([H, -H]) # 2^(r+1) × 2^r ############################################################################## # Public API ############################################################################## def make_codebook(r: int = 11, num_blocks: int = TEXT_LEN//2, energy_budget: float = ENERGY_LIMIT ) -> Tuple[np.ndarray, float]: """ Builds the scaled codebook C (4096×4096) used by both encoder & decoder. α is chosen so that **after the per-sample duplication** in encode_message, a full 20-block message consumes exactly `energy_budget`. """ B = _Br(r) # 4096 × 2048 C = np.hstack((B, B)).astype(np.float32) # 4096 × 4096 n = C.shape[1] # 4096 dup_factor = 2 # sample-duplication alpha = energy_budget / (num_blocks * dup_factor * n) C *= np.sqrt(alpha, dtype=C.dtype) return C, alpha def pair_to_index(a: str, b: str) -> int: return 64 * CHAR_TO_IDX[a] + CHAR_TO_IDX[b] def index_to_pair(k: int) -> tuple[str, str]: if not 0 <= k < 4096: raise ValueError("index out of range [0,4095]") return IDX_TO_CHAR[k // 64], IDX_TO_CHAR[k % 64] def encode_message(msg: str, C: np.ndarray) -> np.ndarray: """ Encode a 40-character message. Each 2-character pair → one codeword row. After concatenation the whole signal is duplicated sample-wise so that the channel’s even / odd indices each carry one clean copy. """ if len(msg) != TEXT_LEN: raise ValueError("Message must be exactly 40 characters.") pairs = [(msg[i], msg[i+1]) for i in range(0, TEXT_LEN, 2)] rows = [C[pair_to_index(a, b)] for a, b in pairs] # 20×4096 signal = np.concatenate(rows).astype(np.float32) signal = np.repeat(signal, 2) # duplicate # tight numeric safety-check (≡ 2000, barring float error) e = np.sum(signal**2) if not np.isclose(e, ENERGY_LIMIT, atol=1e-3): raise RuntimeError(f"energy sanity check failed ({e:.3f} ≠ 2000)") return signal