82 lines
3.3 KiB
Python
82 lines
3.3 KiB
Python
# encoder.py — 2-char/1-codeword implementation
|
||
import numpy as np
|
||
from typing import Tuple
|
||
|
||
##############################################################################
|
||
# Public constants
|
||
##############################################################################
|
||
CHAR_SET = (
|
||
[chr(i) for i in range(ord('a'), ord('z')+1)] +
|
||
[chr(i) for i in range(ord('A'), ord('Z')+1)] +
|
||
[str(i) for i in range(10)] +
|
||
[' ', '.']
|
||
)
|
||
assert len(CHAR_SET) == 64
|
||
CHAR_TO_IDX = {c: i for i, c in enumerate(CHAR_SET)}
|
||
IDX_TO_CHAR = {i: c for c, i in CHAR_TO_IDX.items()}
|
||
|
||
G = 10.0 # channel gain
|
||
ENERGY_LIMIT = 2000.0 # global limit ‖x‖²
|
||
TEXT_LEN = 40 # must stay 40
|
||
|
||
##############################################################################
|
||
# Hadamard-codebook utilities
|
||
##############################################################################
|
||
def _hadamard(r: int) -> np.ndarray:
|
||
if r == 0:
|
||
return np.array([[1.]], dtype=np.float32)
|
||
H = _hadamard(r-1)
|
||
return np.block([[H, H],
|
||
[H, -H]])
|
||
|
||
def _Br(r: int) -> np.ndarray:
|
||
H = _hadamard(r)
|
||
return np.vstack([H, -H]) # 2^(r+1) × 2^r
|
||
|
||
##############################################################################
|
||
# Public API
|
||
##############################################################################
|
||
def make_codebook(r: int = 11,
|
||
num_blocks: int = TEXT_LEN//2,
|
||
energy_budget: float = ENERGY_LIMIT
|
||
) -> Tuple[np.ndarray, float]:
|
||
"""
|
||
Builds the scaled codebook C (4096×4096) used by both encoder & decoder.
|
||
|
||
α is chosen so that **after the per-sample duplication** in encode_message,
|
||
a full 20-block message consumes exactly `energy_budget`.
|
||
"""
|
||
B = _Br(r) # 4096 × 2048
|
||
C = np.hstack((B, B)).astype(np.float32) # 4096 × 4096
|
||
n = C.shape[1] # 4096
|
||
dup_factor = 2 # sample-duplication
|
||
alpha = energy_budget / (num_blocks * dup_factor * n)
|
||
C *= np.sqrt(alpha, dtype=C.dtype)
|
||
return C, alpha
|
||
|
||
def pair_to_index(a: str, b: str) -> int:
|
||
return 64 * CHAR_TO_IDX[a] + CHAR_TO_IDX[b]
|
||
|
||
def index_to_pair(k: int) -> tuple[str, str]:
|
||
if not 0 <= k < 4096:
|
||
raise ValueError("index out of range [0,4095]")
|
||
return IDX_TO_CHAR[k // 64], IDX_TO_CHAR[k % 64]
|
||
|
||
def encode_message(msg: str, C: np.ndarray) -> np.ndarray:
|
||
"""
|
||
Encode a 40-character message. Each 2-character pair → one codeword row.
|
||
After concatenation the whole signal is duplicated sample-wise so that
|
||
the channel’s even / odd indices each carry one clean copy.
|
||
"""
|
||
if len(msg) != TEXT_LEN:
|
||
raise ValueError("Message must be exactly 40 characters.")
|
||
|
||
pairs = [(msg[i], msg[i+1]) for i in range(0, TEXT_LEN, 2)]
|
||
rows = [C[pair_to_index(a, b)] for a, b in pairs] # 20×4096
|
||
signal = np.concatenate(rows).astype(np.float32)
|
||
signal = np.repeat(signal, 2) # duplicate
|
||
# tight numeric safety-check (≡ 2000, barring float error)
|
||
e = np.sum(signal**2)
|
||
if not np.isclose(e, ENERGY_LIMIT, atol=1e-3):
|
||
raise RuntimeError(f"energy sanity check failed ({e:.3f} ≠ 2000)")
|
||
return signal
|