<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"><url><loc>https://llm.jerrinot.info/</loc></url><url><loc>https://llm.jerrinot.info/architectures/gemma4-overview/</loc></url><url><loc>https://llm.jerrinot.info/architectures/gqa-and-mqa/</loc></url><url><loc>https://llm.jerrinot.info/architectures/moe-aggregation/</loc></url><url><loc>https://llm.jerrinot.info/architectures/moe-router/</loc></url><url><loc>https://llm.jerrinot.info/architectures/shared-kv-layers/</loc></url><url><loc>https://llm.jerrinot.info/architectures/sliding-window-attention/</loc></url><url><loc>https://llm.jerrinot.info/block/attention-output/</loc></url><url><loc>https://llm.jerrinot.info/block/causal-attention/</loc></url><url><loc>https://llm.jerrinot.info/block/feed-forward-network/</loc></url><url><loc>https://llm.jerrinot.info/block/full-decoder-block/</loc></url><url><loc>https://llm.jerrinot.info/block/multi-head-attention/</loc></url><url><loc>https://llm.jerrinot.info/block/position-and-rope/</loc></url><url><loc>https://llm.jerrinot.info/block/qkv-projections/</loc></url><url><loc>https://llm.jerrinot.info/block/residuals-and-rmsnorm/</loc></url><url><loc>https://llm.jerrinot.info/block/what-a-layer-does/</loc></url><url><loc>https://llm.jerrinot.info/capstone/zero-to-hero/</loc></url><url><loc>https://llm.jerrinot.info/case-study/dense-gemma/</loc></url><url><loc>https://llm.jerrinot.info/case-study/gemma4-attention/</loc></url><url><loc>https://llm.jerrinot.info/case-study/gemma4-moe/</loc></url><url><loc>https://llm.jerrinot.info/case-study/optimization-and-regression/</loc></url><url><loc>https://llm.jerrinot.info/case-study/profile-interpretation/</loc></url><url><loc>https://llm.jerrinot.info/dashboard/progress/</loc></url><url><loc>https://llm.jerrinot.info/inference/batching-and-ubatching/</loc></url><url><loc>https://llm.jerrinot.info/inference/context-scaling/</loc></url><url><loc>https://llm.jerrinot.info/inference/continuous-batching/</loc></url><url><loc>https://llm.jerrinot.info/inference/decode/</loc></url><url><loc>https://llm.jerrinot.info/inference/kv-cache/</loc></url><url><loc>https://llm.jerrinot.info/inference/prefill/</loc></url><url><loc>https://llm.jerrinot.info/math/dot-product/</loc></url><url><loc>https://llm.jerrinot.info/math/embeddings-as-vectors/</loc></url><url><loc>https://llm.jerrinot.info/math/linear-algebra-synthesis/</loc></url><url><loc>https://llm.jerrinot.info/math/linear-projections/</loc></url><url><loc>https://llm.jerrinot.info/math/matrix-multiply/</loc></url><url><loc>https://llm.jerrinot.info/math/scalars-vectors/</loc></url><url><loc>https://llm.jerrinot.info/math/tensor-shapes/</loc></url><url><loc>https://llm.jerrinot.info/performance/compute-vs-memory/</loc></url><url><loc>https://llm.jerrinot.info/performance/gemm-vs-gemv/</loc></url><url><loc>https://llm.jerrinot.info/performance/quantization/</loc></url><url><loc>https://llm.jerrinot.info/performance/repack-and-layout/</loc></url><url><loc>https://llm.jerrinot.info/performance/threads-and-affinity/</loc></url><url><loc>https://llm.jerrinot.info/performance/validate-speedups/</loc></url><url><loc>https://llm.jerrinot.info/performance/where-time-goes/</loc></url><url><loc>https://llm.jerrinot.info/probability/argmax-and-sampling/</loc></url><url><loc>https://llm.jerrinot.info/probability/foundations-checkpoint/</loc></url><url><loc>https://llm.jerrinot.info/probability/logits/</loc></url><url><loc>https://llm.jerrinot.info/probability/softmax/</loc></url><url><loc>https://llm.jerrinot.info/quiz/architecture-variants/</loc></url><url><loc>https://llm.jerrinot.info/quiz/dense-transformer/</loc></url><url><loc>https://llm.jerrinot.info/quiz/inference-mechanics/</loc></url><url><loc>https://llm.jerrinot.info/quiz/performance-reasoning/</loc></url><url><loc>https://llm.jerrinot.info/reference/glossary/</loc></url><url><loc>https://llm.jerrinot.info/reference/live-inference/</loc></url><url><loc>https://llm.jerrinot.info/start/one-request-end-to-end/</loc></url><url><loc>https://llm.jerrinot.info/start/what-an-llm-does/</loc></url><url><loc>https://llm.jerrinot.info/start/what-you-will-learn/</loc></url><url><loc>https://llm.jerrinot.info/tokens/context-length/</loc></url><url><loc>https://llm.jerrinot.info/tokens/discrete-input-synthesis/</loc></url><url><loc>https://llm.jerrinot.info/tokens/vocabulary-and-ids/</loc></url><url><loc>https://llm.jerrinot.info/tokens/what-is-a-token/</loc></url></urlset>