# RoPE Mental Model
# Annotated reading material. Running this file is optional.
# Source-of-truth focus: Read RoPE as a Q/K rotation before attention, not as an added embedding vector.

def rotate_pair(x0, x1, cos, sin):
    # A 2D rotation: this is the mental model for one RoPE pair.
    return x0 * cos - x1 * sin, x0 * sin + x1 * cos

position = 7
frequency = 0.01
angle = position * frequency
q_pair_after_rope = rotate_pair("q_even", "q_odd", "cos(angle)", "sin(angle)")

# What to explain while reading:
# - pair groups adjacent dimensions into a 2D plane.
# - angle depends on position and frequency.
# - Only Q/K need the rotation because they create attention scores.
#
# Common traps:
# - RoPE is not simply sinusoidal addition.
# - Changing RoPE scaling can change model behavior.
