pdc_project/encoder.py
2025-05-30 00:44:20 +02:00

82 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# encoder.py — 2-char/1-codeword implementation
import numpy as np
from typing import Tuple
##############################################################################
# Public constants
##############################################################################
CHAR_SET = (
[chr(i) for i in range(ord('a'), ord('z')+1)] +
[chr(i) for i in range(ord('A'), ord('Z')+1)] +
[str(i) for i in range(10)] +
[' ', '.']
)
assert len(CHAR_SET) == 64
CHAR_TO_IDX = {c: i for i, c in enumerate(CHAR_SET)}
IDX_TO_CHAR = {i: c for c, i in CHAR_TO_IDX.items()}
G = 10.0 # channel gain
ENERGY_LIMIT = 2000.0 # global limit ‖x‖²
TEXT_LEN = 40 # must stay 40
##############################################################################
# Hadamard-codebook utilities
##############################################################################
def _hadamard(r: int) -> np.ndarray:
if r == 0:
return np.array([[1.]], dtype=np.float32)
H = _hadamard(r-1)
return np.block([[H, H],
[H, -H]])
def _Br(r: int) -> np.ndarray:
H = _hadamard(r)
return np.vstack([H, -H]) # 2^(r+1) × 2^r
##############################################################################
# Public API
##############################################################################
def make_codebook(r: int = 11,
num_blocks: int = TEXT_LEN//2,
energy_budget: float = ENERGY_LIMIT
) -> Tuple[np.ndarray, float]:
"""
Builds the scaled codebook C (4096×4096) used by both encoder & decoder.
α is chosen so that **after the per-sample duplication** in encode_message,
a full 20-block message consumes exactly `energy_budget`.
"""
B = _Br(r) # 4096 × 2048
C = np.hstack((B, B)).astype(np.float32) # 4096 × 4096
n = C.shape[1] # 4096
dup_factor = 2 # sample-duplication
alpha = energy_budget / (num_blocks * dup_factor * n)
C *= np.sqrt(alpha, dtype=C.dtype)
return C, alpha
def pair_to_index(a: str, b: str) -> int:
return 64 * CHAR_TO_IDX[a] + CHAR_TO_IDX[b]
def index_to_pair(k: int) -> tuple[str, str]:
if not 0 <= k < 4096:
raise ValueError("index out of range [0,4095]")
return IDX_TO_CHAR[k // 64], IDX_TO_CHAR[k % 64]
def encode_message(msg: str, C: np.ndarray) -> np.ndarray:
"""
Encode a 40-character message. Each 2-character pair → one codeword row.
After concatenation the whole signal is duplicated sample-wise so that
the channels even / odd indices each carry one clean copy.
"""
if len(msg) != TEXT_LEN:
raise ValueError("Message must be exactly 40 characters.")
pairs = [(msg[i], msg[i+1]) for i in range(0, TEXT_LEN, 2)]
rows = [C[pair_to_index(a, b)] for a, b in pairs] # 20×4096
signal = np.concatenate(rows).astype(np.float32)
signal = np.repeat(signal, 2) # duplicate
# tight numeric safety-check (≡ 2000, barring float error)
e = np.sum(signal**2)
if not np.isclose(e, ENERGY_LIMIT, atol=1e-3):
raise RuntimeError(f"energy sanity check failed ({e:.3f} ≠ 2000)")
return signal