Eugene Yan · Tech & AI
TIER 5 2025-09-14
<!DOCTYPE html>
<html lang="en">
<head>
<!--Load inline css and scripts first-->
<style>:root{--c-light-text:#333;--c-light-background:#fff;--c-light-focus:#00ff00;--c-light-interactive:#007bff;--c-dark-text:#fff;--c-dark-subtext:#a6a6a6;--c-dark-background:#333;--c-dark-focus:#00ff00;--c-dark-interactive:#66b0ff;--c-dark-callout:#003166;--c-text:var(--c-light-text);--c-background:var(--c-light-background);--c-focus:var(--c-light-focus);--c-interactive:var(--c-light-interactive)}.dark-mode-checkbox:checked~.theme-container{--c-text:var(--c-dark-text);--c-background:var(--c-dark-background);--c-focus:var(--c-dark-focus);--c-interactive:var(--c-dark-interactive)}html.dark-mode{--c-text:var(--c-dark-text);--c-background:var(--c-dark-background);--c-focus:var(--c-dark-focus);--c-interactive:var(--c-dark-interactive)}html.dark-mode .tag{background-color:#3e3e3e;color:var(--c-dark-interactive)}html.dark-mode a.tag:hover{background-color:var(--c-dark-interactive);color:#3e3e3e}a{text-decoration:none;background-color:transparent;color:var(--c-interactive)}</style>
<!-- darkmode JS at start of the doc so to ensure consistent view mode -->
<link href="/js/darkmode.js" rel="preload" as="script">
<script src="/js/darkmode.js" type="e98c7fd0fab5af1e4999090d-text/javascript"></script>
<!--Add active class to nav bar-->
<link href="/js/navbar.js" rel="preload" as="script">
<script src="/js/navbar.js" defer type="e98c7fd0fab5af1e4999090d-text/javascript"></script>
<!-- Load jQuery before anchor.min.js -->
<link href="/js/jquery-3.7.1.min.js" rel="preload" as="script">
<script src="/js/jquery-3.7.1.min.js" defer type="e98c7fd0fab5af1e4999090d-text/javascript"></script>
<!--Add anchors to headers-->
<link href="/js/anchor.min.js" rel="preload" as="script">
<script src="/js/anchor.min.js" defer type="e98c7fd0fab5af1e4999090d-text/javascript"></script>
<!-- Algolia Insights -->
<script type="e98c7fd0fab5af1e4999090d-text/javascript">
var ALGOLIA_INSIGHTS_SRC = "/js/search-insights.min.js"; // Using local version
!function(e,a,t,n,s,i,c){e.AlgoliaAnalyticsObject=s,e[s]=e[s]||function(){
(e[s].queue=e[s].queue||[]).push(arguments)},e[s].version=(n.match(/@([^\/]+)\/?.*/) || [])[1],i=a.createElement(t),c=a.getElementsByTagName(t)[0],
i.async=1,i.src=n,c.parentNode.insertBefore(i,c)
}(window,document,"script",ALGOLIA_INSIGHTS_SRC,"aa");
</script>
<script type="e98c7fd0fab5af1e4999090d-text/javascript">
if (typeof aa === 'function') {
aa('init', {
appId: '2XJCLEABQD',
apiKey: 'b61ec4cb64bd32d62c053466fccbfa43',
useCookie: true
});
}
</script>
<meta charset="utf-8">
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=0.86, maximum-scale=3.0, minimum-scale=0.86">
<meta name="description" content="An LLM that can converse in English & item IDs, and make recommendations w/o retrieval or tools.">
<meta name="author" content="Eugene Yan">
<meta content="eugeneyan.com" property="og:site_name">
<meta name=twitter:card content=summary_large_image>
<meta name=twitter:domain content=eugeneyan.com>
<meta content="Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs" property="og:title">
<meta name=twitter:title content="Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs">
<meta content="article" property="og:type">
<meta content="An LLM that can converse in English & item IDs, and make recommendations w/o retrieval or tools." property="og:description">
<meta name=twitter:description content="An LLM that can converse in English & item IDs, and make recommendations w/o retrieval or tools.">
<meta content="https://eugeneyan.com/writing/semantic-ids/" property="og:url">
<meta content="2025-09-14T00:00:00+00:00" property="article:published_time">
<meta content="https://eugeneyan.com/about/" property="article:author">
<meta content="https://eugeneyan.com/assets/og_image/semantic-ids-og.jpg" property="og:image">
<meta name=twitter:image content="https://eugeneyan.com/assets/og_image/semantic-ids-og.jpg">
<meta content="posts" property="article:section">
<meta content="llm" property="article:tag">
<meta content="recsys" property="article:tag">
<meta content="learning" property="article:tag">
<meta content="🛠" property="article:tag">
<meta content="🩷" property="article:tag">
<title>Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs</title>
<!-- styles -->
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e98c7fd0fab5af1e4999090d-|49"></script><link href="/css/main.min.css" rel="preload" as="style" onload="this.rel='stylesheet'" type="text/css">
<link rel="stylesheet" href="/css/main.css" type="text/css">
<!-- Preconnect to Google Fonts domains to reduce latency -->
<link rel="preconnect" href="https://fonts.googleapis.com" crossorigin>
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e98c7fd0fab5af1e4999090d-|49"></script><link href="https://fonts.googleapis.com/css2?family=Merriweather:wght@400;700&family=Raleway&display=swap"
rel="preload" as="style" onload="this.rel='stylesheet'">
<link href="https://fonts.googleapis.com/css2?family=Merriweather:wght@400;700&family=Raleway&display=swap"
rel="stylesheet">
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e98c7fd0fab5af1e4999090d-|49"></script><link href="/css/monokai.css" rel="preload" as="style" onload="this.rel='stylesheet'" type="text/css">
<link href="/css/monokai.css" rel="stylesheet" type="text/css">
<link rel="shortcut icon" type="image/png" href="https://eugeneyan.com/assets/favicon/favicon.ico">
<link rel="apple-touch-icon" sizes="180x180" href="https://eugeneyan.com/assets/favicon/apple-touch-icon.webp">
<link rel="icon" type="image/png" sizes="32x32" href="https://eugeneyan.com/assets/favicon/favicon-32x32.webp">
<link rel="icon" type="image/png" sizes="16x16" href="https://eugeneyan.com/assets/favicon/favicon-16x16.webp">
<link rel="manifest" href="/assets/favicon/site.webmanifest">
<link rel="canonical" href="https://eugeneyan.com/writing/semantic-ids/"/>
<!-- Collect tags-->
<!-- google analytics - i will not share this data with google -->
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4CKMNLRMCV" type="e98c7fd0fab5af1e4999090d-text/javascript"></script>
<script type="e98c7fd0fab5af1e4999090d-text/javascript">
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-4CKMNLRMCV');
</script>
</head>
<input class="dark-mode-checkbox" id="dark-mode" name="dark-mode-checkbox" type="checkbox"/>
<label class="dark-mode-label" for="dark-mode"></label>
<body>
<div class="theme-container grow">
<div class="container" style="width: 95%">
<div class="header">
<div class="row">
<div class="col-sm-3">
<h1 class="text-muted nav"><a href="/">eugeneyan</a></h1>
</div>
<div class="col-sm-9">
<ul id="nav" class="nav-margin nav nav-pills float-sm-right">
<li><a href="/start-here/" title="Start Here">Start Here</a></li>
<li><a href="/writing/" title="Writing">Writing</a></li>
<li><a href="/speaking/" title="Speaking">Speaking</a></li>
<li><a href="/prototyping/" title="Prototyping">Prototyping</a></li>
<li><a href="/about/" title="About">About</a></li>
<li><a href="/search/" title="Search"><img class="icon icon-search" src="/assets/icon-search.svg" loading="lazy" alt=""/></a></li>
</ul>
</div>
</div>
</div>
<div class="notes">
<div class="note single">
<h1 class="title">Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs</h1>
<p class="date">
<info datetime="2025-09-14 00:00:00 +0000">
<span class="no-italics">[
<a class='tag' href="/tag/llm/">llm</a>
<a class='tag' href="/tag/recsys/">recsys</a>
<a class='tag' href="/tag/learning/">learning</a>
<a class='tag' href="/tag/🛠/">🛠</a>
<a class='tag' href="/tag/🩷/">🩷</a>
]
</span> · 23 min read
</info>
</p>
<!-- Post content -->
<div class="notebody">
<script type="e98c7fd0fab5af1e4999090d-text/javascript"> window.MathJax = { chtml: { scale: 0.9 }, svg: { scale: 0.9 }, tex: {inlineMath: [['$', '$'], ['\\(', '\\)']] }}; </script>
<script type="e98c7fd0fab5af1e4999090d-text/javascript" id="MathJax-script" async="" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
<!--https://docs.mathjax.org/en/latest/input/tex/delimiters.html-->
<p>I got nerdsniped when I first heard about Semantic IDs. The idea is simple: Instead of using random hash IDs for videos or songs or products, we can use semantically meaningful tokens that an LLM can natively understand. I wondered, could we train an LLM-recommender hybrid on the rich behavioral data that makes today’s recommender systems so effective?</p>
<p>To my surprise, we can! The result is a language model that can converse in both English and item IDs, not with retrieval or other tools, but as a single, “bilingual” model where items (i.e., semantic IDs) are part of its vocabulary. Like a recommender model, it can recommend items given historical interactions. But the big surprise—and capability unlock—was when I found that <strong>I could simply chat with the model to steer its recommendations, and it could reason about its choices, offer explanations, and creatively name product bundles</strong>.</p>
<p>Here’s a demo video. And here’s the <a href="https://github.com/eugeneyan/semantic-ids-llm" target="_blank">code</a> to prep the data, train the model, and chat with it. (Note: This is a small model with very basic finetuning, so prompting matters a lot. Also, it’s not as general purpose and robust as most LLMs because of the limited finetuning.)</p>
<iframe width="800" height="405" src="https://www.youtube.com/embed/_0n4QS--3V8?si=7ODcCKGLt8Px6h1k" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen=""></iframe>
<h2 id="wait-why-is-this-useful">Wait, why is this useful?</h2>
<p>This marries the best of recsys and language modeling. On one hand, language models have world knowledge and can eloquently talk about products, but are unaware of our catalog. Also, their recommendations are generic and suffer from popularity bias. Conversely, search and recommendation models are trained on our catalog and billions of user interactions. They excel at predicting what a user will click or buy next, but can’t be steered via natural language or reason on their choices. (What they have in common though, is learning on sequences.)</p>
<p>To do this, we first extend the language model’s vocabulary with semantic ID tokens like <code class="language-plaintext highlighter-rouge"><|sid_0|></code>, <code class="language-plaintext highlighter-rouge"><|sid_1|></code>, <code class="language-plaintext highlighter-rouge"><|sid_2|></code>, etc. These tokens are used to represent the catalog. Then, we apply continued pretraining to teach the model the relationships between semantic IDs and the catalog, and then further finetune it on sequences of user behavior. Together, this teaches the model to make recommendations—from our catalog—based on user preferences.</p>
<p>While this LLM-recommender hybrid may not match the raw precision of a specialized multi-stage recsys, it offers a new capability: <strong>steerability and reasoning on recommendations</strong>. The result is a unified experience across search, recommendations, and chat, giving users the ability to find what they need in our catalog by simply asking for it.</p>
<blockquote>
<p><strong>User:</strong> I like animal and cute games. <code class="language-plaintext highlighter-rouge"><|rec|></code></p>
<blockquote>
<p><em>Model interprets the natural language query (“animal and cute”) and generates semantic IDs for relevant games.</em></p>
<div class="language-text highlighter-rouge"><div class="highlight"><pre class="highlight"><code><|sid_start|><|sid_173|><|sid_324|><|sid_764|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_201|><|sid_397|><|sid_701|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_173|><|sid_305|><|sid_670|><|sid_768|><|sid_end|>
</code></pre></div> </div>
</blockquote>
<p><strong>Assistant:</strong> “Animal Crossing: New Leaf”, “DISNEY INFINITY Starter Pack 3DS”, “Nintendogs + Cats: Golden Retriever and New Friends”</p>
</blockquote>
<h2 id="data-and-models">Data and models</h2>
<h3 id="finding-training-data-with-both-item-metadata-and-user-sequences">Finding training data with both item metadata and user sequences</h3>
<p>We’ll use data from the Video Games category of <a href="https://amazon-reviews-2023.github.io" target="_blank">Amazon Reviews 2023 (Hou et al., 2024)</a>. The data has rich product metadata and we can create user interaction sequences from it.</p>
<p>The dataset contains 137k products, each with fields for title, description, features, category, store information, ratings, prices, etc. After keeping only products with titles longer than 20 characters and descriptions longer than 100 characters, we’re left with 66k products. The behavioral data contains 737k records. From this, I built 79k user purchase sequences, each containing at least three valid items. These sequences had an average length of 6.5 items.</p>
<p>I also considered the <a href="https://www.aicrowd.com/challenges/amazon-kdd-cup-23-multilingual-recommendation-challenge" target="_blank">Amazon KDD Cup 2023 data (Amazon, 2023)</a>. It had 500k items and a focus on sequential behavior data. However, its multilingual nature added complexity, and the lack of a product category field made it difficult to work with. In the end, I went with the Amazon Reviews dataset to start simple and be frugal with my compute budget.</p>
<h3 id="semantic-ids-from-rq-vaes">Semantic IDs from RQ-VAEs</h3>
<p><strong>Semantic IDs</strong> (<a href="https://arxiv.org/abs/2305.05065" target="_blank">Rajput et al., 2023</a>; <a href="https://arxiv.org/abs/2306.08121" target="_blank">Singh et al., 2023</a>) are hierarchical representations that <strong>encode items into a sequence of tokens, replacing embeddings or hash-based IDs</strong>. Unlike a conventional item ID (<code class="language-plaintext highlighter-rouge">B0040JHNQG</code>) that has no inherent meaning, a semantic ID (<code class="language-plaintext highlighter-rouge"><|sid_0|><|sid_256|><|sid_512|><|sid_768|></code>) encodes item information. As a result of the training process, similar items naturally share common prefixes, forming a tree-like structure where each level of the ID represents increasingly fine-grained information about the item.</p>
<p><img style="max-width: 100%;" src="/assets/semantic-id.webp" loading="lazy" title="Image" alt="Image" /></p>
<p class="image-caption">The hierarchical structure of semantic IDs for food videos (<a href="https://arxiv.org/abs/2306.08121" target="_blank">source</a>)</p>
<p><strong>Residual Quantized Variational Autoencoders</strong> (RQ-VAEs; <a href="https://arxiv.org/abs/2107.03312" target="_blank">Zeghidour et al., 2021</a>, <a href="https://arxiv.org/abs/2203.01941" target="_blank">Lee et al., 2022</a>) is what we use to convert continuous embeddings into discrete semantic IDs. We start by encoding an item’s metadata (e.g., title, description) into an embedding. The RQ-VAE then uses hierarchical quantization to convert this embedding into a sequence of discrete tokens.</p>
<p><img style="max-width: 100%;" src="/assets/rqvae.webp" loading="lazy" title="Image" alt="Image" /></p>
<p class="image-caption">How RQVAEs convert embeddings to semantic IDs (<a href="https://arxiv.org/abs/2305.05065" target="_blank">source</a>)</p>
<p>This is an iterative process. For the first level, the model finds the closest vector in the first codebook to the input embedding; this vector becomes the first token of the Semantic ID. The model then calculates the quantization error, or <em>residual</em>, by subtracting the chosen codebook vector from the input embedding. For the second level, it finds the closest vector in the second codebook to this residual, which gives us the second token. This process repeats for each level, with each step capturing progressively finer details that the previous levels missed.</p>
<p>The loss function of the RQ-VAE is worth discussing, as understanding it is key to generating high-quality semantic IDs. The overall loss has two main components:</p>
\[L(x) = L_\text{recon} + L_\text{rqvae}\]
<p>The first component, the <strong>reconstruction loss</strong>, ensures the decoder can accurately reconstruct the original item embedding ($x$) from the final quantized representation ($\hat{x}$). It’s a standard squared error loss:</p>
\[L_{recon} = ||x-\hat{x}||^2\]
<p>The second component, the <strong>quantization loss</strong>, measures how well the codebook vectors match the residuals generated by the encoder. It contains two terms:</p>
\[L_{\text{rqvae}} := \sum_{i=0}^{m-1} \left[ \|\text{sg}[r_i] - e_{c_i}\|^2 + \beta\|r_i - \text{sg}[e_{c_i}]\|^2 \right]\]
<p>The first term ($|\text{sg}[r_i] - e_{c_i}|^2)$, the <strong>codebook loss</strong>, is responsible for training the codebook embeddings. It measures the distance between the encoder’s residual ($r_i$) and the chosen codebook vector ($e_{c_i}$). The stop-gradient is applied to the encoder’s output ($\text{sg}[r_i]$) to treat the residuals as the fixed target. Thus, the gradients only flow to the codebook vector, pulling it closer to the encoder’s output.</p>
<p>The second term ($\beta|r_i - \text{sg}[e_{c_i}]|^2$), the <strong>commitment loss</strong>, is responsible for training the encoder. It measures the same distance, but the stop-gradient is applied to the codebook vector ($\text{sg}[e_{c_i}]$) instead. This stops updates to the codebook and forces the encoder to produce outputs, or <em>commit</em> to, vectors that are already in the codebook. The hyperparameter $\beta$ controls the strength of this commitment penalty.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Pytorch code for loss function (without recursive loop)
</span><span class="n">reconstruction_loss</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">x_reconstructed</span><span class="p">)</span>
<span class="n">codebook_loss</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">residual</span><span class="p">.</span><span class="n">detach</span><span class="p">(),</span> <span class="n">codebook_vector</span><span class="p">)</span>
<span class="n">commitment_loss</span> <span class="o">=</span> <span class="n">F</span><span class="p">.</span><span class="n">mse_loss</span><span class="p">(</span><span class="n">residual</span><span class="p">,</span> <span class="n">codebook_vector</span><span class="p">.</span><span class="n">detach</span><span class="p">())</span>
<span class="n">quantization_loss</span> <span class="o">=</span> <span class="n">codebook_loss</span> <span class="o">+</span> <span class="n">commitment_weight</span> <span class="o">*</span> <span class="n">commitment_loss</span>
<span class="n">total_loss</span> <span class="o">=</span> <span class="n">recon_loss</span> <span class="o">+</span> <span class="n">quantization_loss</span>
</code></pre></div></div>
<p>Through this process, the RQ-VAE produces a semantic ID as a sequence of tokens, one from each quantization level. Because similar items share common prefixes, language models can better understand product relationships, which is also useful for tree-based retrieval.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># How to hierarchically encode embeddings to semantic IDs
</span><span class="k">def</span> <span class="nf">encode_to_semantic_ids</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tensor</span><span class="p">:</span>
<span class="k">with</span> <span class="n">torch</span><span class="p">.</span><span class="n">no_grad</span><span class="p">():</span>
<span class="n">residual</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">encode</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">indices_list</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">vq_layer</span> <span class="ow">in</span> <span class="bp">self</span><span class="p">.</span><span class="n">vq_layers</span><span class="p">:</span>
<span class="n">vq_output</span> <span class="o">=</span> <span class="n">vq_layer</span><span class="p">(</span><span class="n">residual</span><span class="p">)</span>
<span class="n">indices_list</span><span class="p">.</span><span class="n">append</span><span class="p">(</span><span class="n">vq_output</span><span class="p">.</span><span class="n">indices</span><span class="p">)</span>
<span class="n">residual</span> <span class="o">=</span> <span class="n">residual</span> <span class="o">-</span> <span class="n">vq_output</span><span class="p">.</span><span class="n">quantized</span>
<span class="k">return</span> <span class="n">torch</span><span class="p">.</span><span class="n">stack</span><span class="p">(</span><span class="n">indices_list</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
</code></pre></div></div>
<p>Nonetheless, one practical challenge is that this doesn’t guarantee a unique ID for every item. In my experiments with a three-level codebook, with each level having 256 codes, we saw collisions on ~10% of the 66k products. To solve this, I added a fourth level where I appended a sequentially increasing token to each ID to ensure every product is uniquely identifiable.</p>
<h3 id="sasrec-qwen3-embedding-06b-and-qwen3-8b">SASRec, Qwen3-Embedding-0.6B, and Qwen3-8b</h3>
<p>In addition to the RQ-VAE, we use three other models. First, we’ll train a <strong>SASRec</strong> on semantic IDs to validate their quality and compare it to an item ID SASRec baseline. Then, we use the <strong>Qwen3-Embedding-0.6B model</strong> to encode product metadata to embeddings. Finally, we finetune the <strong>Qwen3-8B model</strong> to understand and recommend items via semantic IDs.</p>
<p><a href="https://arxiv.org/abs/1808.09781" target="_blank">SASRec (Kang & McAuley, 2018)</a> is a sequential recommender inspired by the <a href="https://arxiv.org/abs/1706.03762" target="_blank">Transformer</a> architecture. It encodes a user’s interaction history and uses a self-attention mechanism to weigh the most relevant past items in order to predict the next one. This allows the model to learn long-term dependencies in user behavior and thus outperform older recurrent models like RNNs and GRUs while being more efficient due to its parallelizable nature.</p>
<p><a href="https://arxiv.org/abs/2506.05176" target="_blank">Qwen3-Embedding-0.6B (Zhang et al., 2025)</a> is part of a series of embedding models available in 0.6B, 4B, and 8B sizes. They are trained via a multi-stage process that includes pre-training on synthetic data, followed by supervised finetuning and model merging for robustness. The 8B model achieves SOTA performance on the MTEB Multilingual benchmark.</p>
<p><a href="https://arxiv.org/abs/2505.09388" target="_blank">Qwen3-8B (Yang et al., 2025)</a> is a dense language model in the Qwen3 family. Despite being one of the smaller models, its post-training is optimized through strong-to-weak distillation on Qwen3-235B-A22B and Qwen3-32B. This makes the Qwen3-8B relatively capable for its size, surpassing bigger, previous-generation models like Qwen2.5-14B on more than half of the evaluated benchmarks, especially STEM and coding. Like other models in the Qwen3 series, the Qwen3-8B has dual thinking and non-thinking modes.</p>
<h2 id="cleaning-data-and-creating-user-sequences">Cleaning data and creating user sequences</h2>
<p>First, we prepare the item metadata to ensure high-quality inputs for the semantic ID model. We start by excluding items that have titles with less than 20 characters or descriptions with less than 100 characters. This reduced the item count by half, from 137k to 66k unique items.</p>
<p>Next, we clean item descriptions with Gemini 2.5 Flash Lite (<a href="https://arxiv.org/abs/2507.06261">Comanici et al., 2025</a>) by fixing truncated sentences, removing HTML, and reducing verbosity. This halved the average description length from 1,038 to 538 characters. Similarly, we remove promotional text and standardize formatting for titles, turning verbose listings like “NEW! LIMITED! Sega Saturn RGB SCART LEAD CABLE…” into a clean “Sega Saturn RGB SCART Cable”.</p>
<p>Then, we augmented the data by extracting structured metadata such as product type (Game, Hardware, Accessory), platform (PS4, Xbox, Wii), genre (Roguelike, Soulslike, Metroidvania), hardware type, brand, multiplayer modes, etc. This process had 98% coverage for platform information, 78% for brand identification, and 51% for genre classification.</p>
<p>Finally, to build the user sequences, we deduplicated on users and built interaction histories, resulting in 91.5k sequences. From these sequences, we excluded items that had no metadata, and then filtered out sequences that had less than three items. We also truncated sequences to a maximum length of 100 items (only 28 sequences were truncated). This gave us a dataset of 78.6k sequences that have a median length of 5 items and a mean length of 6.5 items.</p>
<h2 id="training-an-rq-vae-to-output-semantic-ids">Training an RQ-VAE to output Semantic IDs</h2>
<p>To embed the items, we use the Qwen3-Embedding-0.6B model. It supports customization of the input instruction for various tasks, and we add the prefix of “Given a product description, generate a semantic embedding that captures its key features and characteristics”. From this, we get 1024-dim embeddings via last token pooling and L2-normalize them before saving.</p>
<p>The RQ-VAE consists of an encoder, three quantization levels with 256 codes each, and a symmetric decoder. For training stability, we use the <a href="https://arxiv.org/abs/2410.06424" target="_blank">rotation trick (Fifty et al., 2025)</a> as a replacement for the Straight-Through Estimator (to calculate the gradient for $L_\text{recon}$). Other optimizations included initializing codebooks with k-means clustering, resetting unused codes, and using a large batch size. I also tried a few techniques that didn’t help, such as updating the codebook with EMA and stopping gradients to the decoder.</p>
<p>The trained RQ-VAE achieved 89% unique semantic IDs across 66k products on the three quantization levels. To resolve the remaining collisions, I appended a fourth token that assigns a unique, sequential ID (0, 1, 2, …) to any products that share the same first three codes. This ensures every product has a unique 4-part semantic ID.</p>
<p>I ran a few dozen experiments to understand more about RQ-VAEs and their output semantic IDs, and to find an optimal configuration for the model. Here are some key findings.</p>
<p>First, I experimented with the commitment weight $\beta$ that balances reconstruction accuracy and codebook commitment. I tested values of 0.25 (yellow), 0.5 (orange), and 1.0 (red), and found that a higher $\beta$ of 1.0 led to the most unique IDs but also had the highest validation loss. And while a lower $\beta$ of 0.25 led to slightly more unique IDs, $\beta$ of 0.5 had the lowest validation loss. Thus, on this dataset, I trained subsequent RQ-VAEs with $\beta$ = 0.5. (Note: This differs from the Semantic ID papers which used $\beta$ = 0.25.)</p>
<p><img style="max-width: 100%;" src="/assets/rqvae-curves.webp" loading="lazy" title="Image" alt="Image" /></p>
<p class="image-caption">Curves with beta = 0.25 (yellow), 0.5 (orange), and 1.0 (red)</p>
<p>Aside: A quick primer on what each of these metrics mean:</p>
<ul>
<li><strong>loss/reconstruction</strong>: Measures how well the RQ-VAE can reconstruct the original item embeddings after compressing it into semantic IDs and then decompressing it.</li>
<li><strong>loss/vq</strong>: Combined codebook and commitment loss across all levels. Ensures encoder outputs stay closs to the codebook vectors and codebook vectors adapt to encoder outputs. Essential for compressing embeddings into meaningful semantic IDs.</li>
<li><strong>loss/total</strong>: Sum of reconstruction loss and VQ loss to monitor overall progress.</li>
<li><strong>loss/validation</strong>: Total loss on held-out validation set to monitor generalization.</li>
<li><strong>metrics/avg_residual_norm</strong>: The amount of “leftover” residual after all quatization levels. Lower residual = codebooks are better at capturing the input embedding.</li>
<li><strong>metrics/unique_id_proportion</strong>: % of items with unique IDs in a batch. Checks for codebook collapse. Higher = better ability to distingush between items.</li>
</ul>
<p>I also experimented with a shallower encoder and the impact of metadata cleaning. The shallower encoder (green) performed worse, increasing validation loss and reducing the number of unique IDs. However, investing in data cleaning paid off (blue). It led to a model with the lowest reconstruction and validation losses while having the highest proportion of unique IDs. I used the RQ-VAE from this run.</p>
<p><img style="max-width: 100%;" src="/assets/rqvae-curves2.webp" loading="lazy" title="Image" alt="Image" /></p>
<p class="image-caption">Curves with beta = 0.5 (orange), shallower encoder (green), and clean data (blue)</p>
<p>Another way to evaluate RQ-VAEs is to inspect the codebook utilization. Relatively uniform usage across all codes suggests the model is using its full expressive capacity. The final RQ-VAE demonstrates this well; across all three quantization levels, usage is spread evenly with low variance, as shown in the histogram below.</p>
<p><img style="max-width: 100%;" src="/assets/codebook-usage.webp" loading="lazy" title="Image" alt="Image" /></p>
<p class="image-caption">Example of an RQVAE with well distributed codebook usage</p>
<p>In contrast, a poorly converged RQ-VAE will have sparse and highly concentrated code usage. The histogram below shows this failure mode, where a few codes are overused, and the majority of the codebook is ignored.</p>
<p><img style="max-width: 100%;" src="/assets/codebook-usage2.webp" loading="lazy" title="Image" alt="Image" /></p>
<p class="image-caption">Example of an RQVAE with poorly distributed codebook usage</p>
<p>With the trained RQ-VAE, we encode all item embeddings into their semantic ID format, such as <code class="language-plaintext highlighter-rouge"><|sid_start|><|sid_191|><|sid_260|><|sid_716|><|sid_768|><|sid_end|></code>. Then, we transformed the 78.6k user purchase sequences from regular IDs to semantic ID sequences. These sequences are the training data used to both validate the quality of the IDs with a SASRec baseline and to finetune the Qwen3-8B model.</p>
<h2 id="training-a-sasrec-on-regular-item-ids-vs-semantic-ids">Training a SASRec on regular item IDs vs. semantic IDs</h2>
<p>To validate that our semantic IDs capture meaningful product relationships, we train two variants of the SASRec, a baseline trained on regular item IDs and a variant trained on semantic IDs, and then compare their performance.</p>
<p>The <strong>baseline SASRec</strong> follows the standard architecture. It treats each product as a distinct, atomic unit, learning an embedding for it from scratch. This is based purely on behavioral patterns. The model uses 2 causal self-attention blocks, a 64-dimensional hidden state, and is trained on the discriminative task of distinguishing the next item in a sequence from randomly sampled negative items using a binary cross-entropy (BCE) loss.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Baseline SASRec predict function
</span><span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_ids</span><span class="p">:</span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">candidate_ids</span><span class="p">:</span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">)</span> <span class="o">-></span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">:</span>
<span class="s">"""Predict scores for candidate items.
Args:
input_ids: Item sequences [batch_size, seq_length]
candidate_ids: Candidate items to score [batch_size, num_candidates]
Returns:
Scores for each candidate [batch_size, num_candidates]
"""</span>
<span class="c1"># Get sequence representations
</span> <span class="n">hidden_states</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">forward</span><span class="p">(</span><span class="n">input_ids</span><span class="p">)</span> <span class="c1"># [B, T, H]
</span>
<span class="c1"># Use only the last hidden state for prediction
</span> <span class="n">final_hidden</span> <span class="o">=</span> <span class="n">hidden_states</span><span class="p">[:,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="p">:]</span> <span class="c1"># [B, H]
</span>
<span class="c1"># Get candidate embeddings
</span> <span class="n">candidate_embs</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">item_emb</span><span class="p">(</span><span class="n">candidate_ids</span><span class="p">)</span> <span class="c1"># [B, C, H]
</span>
<span class="c1"># Compute scores via dot product
</span> <span class="n">scores</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="n">bmm</span><span class="p">(</span><span class="n">candidate_embs</span><span class="p">,</span> <span class="n">final_hidden</span><span class="p">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)).</span><span class="n">squeeze</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># [B, C]
</span>
<span class="k">return</span> <span class="n">scores</span>
</code></pre></div></div>
<p>In contrast, the <strong>Semantic ID SASRec</strong> reframes recommendation as a conditional generative task. Instead of scoring items, its objective is to generate the next item’s 4-part semantic ID, token-by-token. This requires a larger architecture with 4 transformer blocks and 384-dim hidden states. Unlike the <a href="https://arxiv.org/abs/2305.05065" target="_blank">TIGER paper</a> which uses a T5 encoder-decoder, this SASRec variant is decoder-only, making it a more direct and equitable comparison to the baseline SASRec. Because we use semantic IDs, instead having an embedding for each of the 66k items, we have a total of 1,024 token-level embeddings, with 256 tokens per level in the semantic ID.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Semantic ID SASRec predict function
</span><span class="k">def</span> <span class="nf">predict_next_item</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_ids</span><span class="p">:</span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">teacher_forcing</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="bp">True</span><span class="p">,</span>
<span class="n">target_tokens</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">]</span> <span class="o">=</span> <span class="bp">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">torch</span><span class="p">.</span><span class="n">Tensor</span><span class="p">]:</span>
<span class="s">"""Predict the next item's semantic ID tokens sequentially.
Args:
input_ids: Token sequences [batch_size, seq_length * num_levels]
teacher_forcing: Use ground truth for conditioning during training
target_tokens: Ground truth tokens for next item [batch_size, num_levels]
Returns:
Dictionary with logits for each level
"""</span>
<span class="n">hidden_states</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">forward</span><span class="p">(</span><span class="n">input_ids</span><span class="p">)</span> <span class="c1"># [B, T*L, H]
</span>
<span class="c1"># Get representation at the last position as context for all previous items
</span> <span class="n">last_hidden</span> <span class="o">=</span> <span class="n">hidden_states</span><span class="p">[:,</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="p">:]</span> <span class="c1"># [B, H]
</span>
<span class="n">predictions</span> <span class="o">=</span> <span class="p">{}</span>
<span class="c1"># Sequential generation: Predict each level conditioned on previous
</span> <span class="k">for</span> <span class="n">level</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="p">.</span><span class="n">num_levels</span><span class="p">):</span>
<span class="k">if</span> <span class="n">level</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="c1"># Level 0: predict directly from sequence representation
</span> <span class="n">context</span> <span class="o">=</span> <span class="n">last_hidden</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Levels 1-3: condition on previously predicted/true tokens
</span> <span class="k">if</span> <span class="n">teacher_forcing</span> <span class="ow">and</span> <span class="n">target_tokens</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
<span class="c1"># Use ground truth previous levels during training
</span> <span class="n">prev_tokens</span> <span class="o">=</span> <span class="n">target_tokens</span><span class="p">[:,</span> <span class="p">:</span><span class="n">level</span><span class="p">]</span> <span class="c1"># [B, level]
</span> <span class="k">else</span><span class="p">:</span>
<span class="c1"># Use predicted tokens during inference
</span> <span class="n">prev_tokens</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">_sample_from_predictions</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">level</span><span class="p">)</span>
<span class="n">prev_embeds</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">token_emb</span><span class="p">(</span><span class="n">prev_tokens</span><span class="p">)</span> <span class="c1"># [B, level, input_dim]
</span> <span class="n">prev_embeds_projected</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">input_projection</span><span class="p">(</span><span class="n">prev_embeds</span><span class="p">)</span> <span class="c1"># [B, level, H]
</span> <span class="n">prev_context</span> <span class="o">=</span> <span class="n">prev_embeds_projected</span><span class="p">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># [B, H]
</span>
<span class="c1"># Combine with sequence context
</span> <span class="n">combined</span> <span class="o">=</span> <span class="n">torch</span><span class="p">.</span><span class="n">cat</span><span class="p">([</span><span class="n">last_hidden</span><span class="p">,</span> <span class="n">prev_context</span><span class="p">],</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># [B, 2*H]
</span> <span class="n">context</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">context_combiners</span><span class="p">[</span><span class="n">level</span> <span class="o">-</span> <span class="mi">1</span><span class="p">](</span><span class="n">combined</span><span class="p">)</span> <span class="c1"># [B, H]
</span>
<span class="c1"># Predict current level
</span> <span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="p">.</span><span class="n">level_heads</span><span class="p">[</span><span class="n">level</span><span class="p">](</span><span class="n">context</span><span class="p">)</span> <span class="c1"># [B, codebook_size]
</span> <span class="n">predictions</span><span class="p">[</span><span class="sa">f</span><span class="s">"logits_l</span><span class="si">{</span><span class="n">level</span><span class="si">}</span><span class="s">"</span><span class="p">]</span> <span class="o">=</span> <span class="n">logits</span>
<span class="k">return</span> <span class="n">predictions</span>
</code></pre></div></div>
<p>This generative approach changes how we train and evaluate. The loss function is no longer a simple BCE loss but a sum of cross-entropy losses across each level of the semantic ID, forcing the model to predict the entire sequence correctly. Evaluation is also more complex, where instead of a dot product, an item’s score is its joint log-probability, calculated by summing the log-probs of generating each token. To improve training stability, we apply teacher forcing, where the ground-truth token from a prior level helps guide the prediction for the next level.</p>
<p>To evaluate both models, we use a validation set where we added 500 negative samples for each positive next item. While the baseline SASRec outperformed the semantic ID variant, the semantic model’s performance is respectable given the difficult generative task of predicting four correct tokens. Furthermore, the semantic ID variant has the <strong>ability to handle cold-start items</strong> by leveraging shared token prefixes from similar products, a capability the baseline lacks. This reveals the core trade-off, where gaining this ability to generalize requires 4x more predictions per item and requires higher training and inference compute.</p>
<table>
<thead>
<tr>
<th style="text-align: left">Model</th>
<th style="text-align: left">Hit@10</th>
<th style="text-align: left">NDCG@10</th>
<th style="text-align: left">MRR</th>
<th style="text-align: left">Mean Rank</th>
<th style="text-align: left">Median Rank</th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align: left"><strong>Baseline SASRec</strong></td>
<td style="text-align: left">0.2812</td>
<td style="text-align: left">0.1535</td>
<td style="text-align: left">0.1300</td>
<td style="text-align: left">138.9</td>
<td style="text-align: left">41.0</td>
</tr>
<tr>
<td style="text-align: left"><strong>Semantic ID SASRec</strong></td>
<td style="text-align: left">0.2020</td>
<td style="text-align: left">0.1138</td>
<td style="text-align: left">0.1007</td>
<td style="text-align: left">179.7</td>
<td style="text-align: left">79.0</td>
</tr>
</tbody>
</table>
<h2 id="fine-tuning-qwen3-8b-to-recommend-semantic-ids">Fine-tuning Qwen3-8B to Recommend Semantic IDs</h2>
<p>Next, we teach a language model to converse in semantic IDs. For this, we finetune <strong>Qwen3-8B</strong> to become “bilingual”, fluent in both natural language and semantic IDs.</p>
<p>First, we build a training dataset of 4.2 million conversational examples to teach the model about semantic IDs and recommendations. The data covers several task types, including mapping semantic IDs to their corresponding text descriptions (and vice-versa), predicting the next item in a user’s sequence, understanding relationships between item categories, and multi-hop reasoning. Each of these examples are formatted as conversations with a system prompt, a user instruction, and an assistant’s response.</p>
<p>Then, we finetune the model in two phases. The first phase focuses on vocabulary extension, where we add 1,027 new tokens for the semantic IDs (i.e., <code class="language-plaintext highlighter-rouge"><|sid_start|></code>, <code class="language-plaintext highlighter-rouge"><|sid_end|></code>, <code class="language-plaintext highlighter-rouge"><|sid_0|></code> to <code class="language-plaintext highlighter-rouge"><|sid_1023|></code>, and <code class="language-plaintext highlighter-rouge"><|rec|></code>) to the Qwen3-8B tokenizer and resize the model’s embedding matrix for them. During this phase, we freeze all model parameters except the input and output embedding layers, training 1.23B parameters (15.3% of the total) for 1,000 steps with a relatively high learning rate.</p>
<p>With the semantic ID token embeddings initialized, we proceed to the next phase of full finetuning on all 8.3B parameters. For this, we used a lower learning rate, a 3% warmup schedule, and the 8-bit AdamW optimizer for memory efficiency, training for three epochs on the full dataset. To monitor progress, we used several custom callbacks that tracked training metrics like gradient norms, measured hierarchical accuracy on recommendation tasks, and sampled model outputs on a fixed set of conversational inputs.</p>
<h2 id="chatting-with-the-llm-recommender-hybrid">Chatting with the LLM-recommender hybrid</h2>
<p>The finetuned model can then be used to recommend items, and is conversant in semantic IDs and natural language. Because semantic IDs are a native part of its vocabulary, the model generates recommendations by simply predicting the next ID in a sequence, token-by-token. For interactive sessions, a chat interface translates the model’s raw output (that contains semantic IDs) back into product titles, creating a human-readable conversation.</p>
<blockquote>
<p><strong>User:</strong> “Halo 3 Limited Edition -Xbox 360”,
“Lost Planet: Extreme Condition - Xbox 360”,
“Lost Planet: Extreme Condition Collector’s Edition -Xbox 360”
<code class="language-plaintext highlighter-rouge"><|rec|></code></p>
<blockquote>
<p><em>Model takes the user’s interaction history (a sequence of Xbox 360 sci-fi shooters) and performs next-item prediction.</em></p>
<div class="language-text highlighter-rouge"><div class="highlight"><pre class="highlight"><code><|sid_start|><|sid_64|><|sid_351|><|sid_733|><|sid_768|><|sid_end|>
</code></pre></div> </div>
</blockquote>
<p><strong>Assistant:</strong> “Mass Effect - Xbox 360”</p>
</blockquote>
<p>Basic tests show the model can provide standard next-item recommendations based on a user’s history and refine recs based on natural language like platform constraints (<em>“Xbox games similar to…“</em>) or genre (<em>“I like animal and cute games.”</em>).</p>
<blockquote>
<p><strong>User:</strong> Xbox games similar to “The Legend of Zelda: Ocarina of Time 3D”: <code class="language-plaintext highlighter-rouge"><|rec|></code></p>
<blockquote>
<p><em>Model takes a seed item (“The Legend of Zelda”) and a natural language constraint (“Xbox”) to steer recommendations to a different platform.</em></p>
<div class="language-text highlighter-rouge"><div class="highlight"><pre class="highlight"><code><|sid_start|><|sid_64|><|sid_271|><|sid_576|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_64|><|sid_435|><|sid_615|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_167|><|sid_271|><|sid_578|><|sid_768|><|sid_end|>
</code></pre></div> </div>
</blockquote>
<p><strong>Assistant:</strong> “Fallout: New Vegas - Xbox 360 Ultimate Edition”, “Halo 4 - Xbox 360 (Standard Game)”, “Halo Reach - Legendary Edition”</p>
</blockquote>
<p>The model can also explain its recommendations, describe the relationship between two items, and even suggest creative names and descriptions for product bundles.</p>
<blockquote>
<p><strong>User:</strong> > I just finished “Dragon Quest Heroes II Explorer’s Edition - PlayStation 4”. Suggest another <code class="language-plaintext highlighter-rouge"><|rec|></code> and explain why:</p>
<blockquote>
<p><em>Model performs a hybrid task where it generates a relevant semantic ID and then provides an explanation for the recommendation.</em></p>
<div class="language-text highlighter-rouge"><div class="highlight"><pre class="highlight"><code><|sid_start|><|sid_139|><|sid_289|><|sid_534|><|sid_768|><|sid_end|>
If you liked Dragon Quest Heroes II, you might like Nights of Azure
because both are action RPGs for the PlayStation 4 with a focus on
combat and character progression. Both games offer a narrative-
driven experience with a strong emphasis on combat mechanics,
suggesting a shared player interest in this genre.
</code></pre></div> </div>
</blockquote>
<p><strong>Assistant:</strong> “Nights of Azure - PlayStation 4”</p>
<p>If you liked Dragon Quest Heroes II, you might like Nights of Azure because both are action RPGs for the PlayStation 4 with a focus on combat and character progression. Both games offer a narrative-driven experience with a strong emphasis on combat mechanics, suggesting a shared player interest in this genre.</p>
</blockquote>
<p>Finally, while it may not match the raw accuracy of a specialized recommender, this unified interface allows users to discover products from our catalog through multi-turn dialogue, leading to the convergence of search, recsys, and chat.</p>
<p>And this is all done via the model itself, without intent classifiers, routers, retrieval, etc.</p>
<blockquote>
<p><strong>[TURN 1] User:</strong> I’m looking for games similar to Mario Kart. <code class="language-plaintext highlighter-rouge"><|rec|></code></p>
<blockquote>
<p><em>Model generates semantic IDs for three kart-style racing games.</em>:</p>
<div class="language-text highlighter-rouge"><div class="highlight"><pre class="highlight"><code><|sid_start|><|sid_131|><|sid_503|><|sid_738|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_131|><|sid_260|><|sid_577|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_145|><|sid_416|><|sid_671|><|sid_768|><|sid_end|>
</code></pre></div> </div>
</blockquote>
<p><strong>Assistant:</strong> “Need for Speed Most Wanted - Sony PSP”, “Need for Speed Hot Pursuit 2 - PlayStation 2”, “Burnout Legends - Nintendo DS”</p>
<p><strong>[TURN 2] User:</strong> How about something similar but for Xbox? <code class="language-plaintext highlighter-rouge"><|rec|></code></p>
<blockquote>
<p><em>Model uses the conversation history and the new “Xbox” constraint to generate semantic IDs for relevant Xbox arcade-racers.</em></p>
<div class="language-text highlighter-rouge"><div class="highlight"><pre class="highlight"><code><|sid_start|><|sid_183|><|sid_461|><|sid_517|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_183|><|sid_461|><|sid_562|><|sid_768|><|sid_end|>,
<|sid_start|><|sid_183|><|sid_461|><|sid_616|><|sid_768|><|sid_end|>
</code></pre></div> </div>
</blockquote>
<p><strong>Assistant:</strong> “Need for Speed Carbon - Xbox 360”, “Project Gotham Racing 3 - Xbox 360”, “Forza Motorsport 4 - Xbox 360”</p>
<p><strong>[TURN 3] User:</strong> Suggest a name and description for the bundle.</p>
<blockquote>
<p><em>Model understands the context has shifted from recommendation to a creative text generation task.</em></p>
</blockquote>
<p><strong>Assistant:</strong>: Xbox Racing Legends: Speed & Style Pack</p>
</blockquote>
<p><br /></p>
<p class="image-caption">• • •</p>
<p>Okay, that’s it! I hope you found this as exciting as I do! If I had more compute, I’d experiment with sentence piece tokenization on the semantic IDs, which <a href="https://arxiv.org/abs/2306.08121" target="_blank">Deepmind and Youtube have found helpful</a>, and further explore the impact of semantic IDs on cold-start. I’d also train RQ-VAEs and language models on datasets with millions of products and a greater variety of tasks and conversations to test how this approach scales and generalizes.</p>
<p>Have you trained models or built UXes with semantic IDs? I’d love to chat, please <a href="https://x.com/eugeneyan" target="_blank">reach out</a>!</p>
<p><br /></p>
<p>Thanks to <a href="https://x.com/devanshtandon_" target="_blank">Devansh</a> and <a href="https://x.com/shrikar84" target="_blank">Shrikar</a> for the discussions, and <a href="https://x.com/LukePiette" target="_blank">Luke</a> and <a href="https://www.runpod.io/?ref=4uddqig9" target="_blank">Runpod</a> for the compute to run these experiments. (Please use my <a href="https://runpod.io?ref=4uddqig9" target="_blank">affiliate code</a> so I get more credit for experiments!) Thanks to <a href="https://x.com/vibhuuuus" target="_blank">Vibhu</a>, <a href="https://x.com/swyx" target="_blank">Shawn</a>, <a href="https://x.com/_chenglou" target="_blank">Cheng</a>, and everyone who had to tolerate my yapping on this.</p>
<h2 id="references">References</h2>
<p>Hou, Yupeng, Jiacheng Li, Zhankui He, An Yan, Xiusi Chen, and Julian McAuley. “Bridging Language and Items for Retrieval and Recommendation.” arXiv:2403.03952. Preprint, arXiv, March 6, 2024. https://doi.org/10.48550/arXiv.2403.03952.</p>
<p>Amazon. “Amazon KDD Cup ‘23 - Multilingual Recommendation Challenge Dataset.” AIcrowd, 2023. https://www.aicrowd.com/challenges/amazon-kdd-cup-23-multilingual-recommendation-challenge.</p>
<p>Rajput, Shashank, Nikhil Mehta, Anima Singh, et al. “Recommender Systems with Generative Retrieval.” arXiv:2305.05065. Preprint, arXiv, November 3, 2023. https://doi.org/10.48550/arXiv.2305.05065.</p>
<p>Singh, Anima, Trung Vu, Nikhil Mehta, et al. “Better Generalization with Semantic IDs: A Case Study in Ranking for Recommendations.” arXiv:2306.08121. Preprint, arXiv, May 30, 2024. https://doi.org/10.48550/arXiv.2306.08121.</p>
<p>Zeghidour, Neil, Alejandro Luebs, Ahmed Omran, Jan Skoglund, and Marco Tagliasacchi. “SoundStream: An End-to-End Neural Audio Codec.” arXiv:2107.03312. Preprint, arXiv, July 7, 2021. https://doi.org/10.48550/arXiv.2107.03312.</p>
<p>Lee, Doyup, Chiheon Kim, Saehoon Kim, Minsu Cho, and Wook-Shin Han. “Autoregressive Image Generation Using Residual Quantization.” arXiv:2203.01941. Preprint, arXiv, March 9, 2022. https://doi.org/10.48550/arXiv.2203.01941.</p>
<p>Kang, Wang-Cheng, and Julian McAuley. “Self-Attentive Sequential Recommendation.” arXiv:1808.09781. Preprint, arXiv, August 20, 2018. https://doi.org/10.48550/arXiv.1808.09781.</p>
<p>Vaswani, Ashish, Noam Shazeer, Niki Parmar, et al. “Attention Is All You Need.” arXiv:1706.03762. Preprint, arXiv, August 2, 2023. https://doi.org/10.48550/arXiv.1706.03762.</p>
<p>Zhang, Yanzhao, Mingxin Li, Dingkun Long, et al. “Qwen3 Embedding: Advancing Text Embedding and Reranking Through Foundation Models.” arXiv:2506.05176. Preprint, arXiv, June 11, 2025. https://doi.org/10.48550/arXiv.2506.05176.</p>
<p>Yang, An, Anfeng Li, Baosong Yang, et al. “Qwen3 Technical Report.” arXiv:2505.09388. Preprint, arXiv, May 14, 2025. https://doi.org/10.48550/arXiv.2505.09388.</p>
<p>Comanici, Gheorghe, Eric Bieber, Mike Schaekermann, et al. “Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities.” arXiv:2507.06261. Preprint, arXiv, July 22, 2025. https://doi.org/10.48550/arXiv.2507.06261.</p>
<br>
<p>If you found this useful, please cite this write-up as:</p>
<blockquote class="blockquote-citation">
<p>Yan, Ziyou. (Sep 2025). Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs. eugeneyan.com.
https://eugeneyan.com/writing/semantic-ids/.</p>
</blockquote>
<p>or</p>
<div class="citation"><pre><code>@article{yan2025semantic-ids,
title = {Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs},
author = {Yan, Ziyou},
journal = {eugeneyan.com},
year = {2025},
month = {Sep},
url = {https://eugeneyan.com/writing/semantic-ids/}
}</code></pre>
</div>
<br>
<style>
#share-buttons {
display: inline-block;
vertical-align: middle;
}
#share-buttons:after {
content: "";
display: block;
clear: both;
}
#share-buttons > div {
position: relative;
text-align: left;
height: 36px;
width: 32px;
float: left;
text-align: center;
}
#share-buttons > div > svg {
height: 16px;
fill: #808080;
margin-top: 10px;
}
#share-buttons > div:hover {
cursor: pointer;
}
</style>
<span style="font-size: 18px">Share on: </span>
<div id="share-buttons">
<div class="twitter" title="Share this on Twitter" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://twitter.com/intent/tweet?text=Great read! Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs&url=https://eugeneyan.com/writing/semantic-ids/&via=eugeneyan', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e98c7fd0fab5af1e4999090d-="">
<img class="icon about-icon-large" src="/assets/icon-twitter.svg" loading="lazy" alt=""/>
</div>
<div class="linkedin" title="Share this on Linkedin" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://www.linkedin.com/shareArticle?mini=true&url=https://eugeneyan.com/writing/semantic-ids/&source=eugeneyan.com', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e98c7fd0fab5af1e4999090d-="">
<img class="icon about-icon-large" src="/assets/icon-linkedin.svg" loading="lazy" alt=""/>
</div>
<div class="bluesky" title="Share this on Bluesky" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://bsky.app/intent/compose?text=Great read! Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs https://eugeneyan.com/writing/semantic-ids/', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e98c7fd0fab5af1e4999090d-="">
<img class="icon about-icon-large" src="/assets/bluesky.svg" loading="lazy" alt=""/>
</div>
<div class="facebook fb-share-button" title="Share this on Facebook" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://www.facebook.com/dialog/share?app_id=249237293114028&display=popup&href=https://eugeneyan.com/writing/semantic-ids/&&redirect_uri=https://eugeneyan.com/writing/semantic-ids/', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e98c7fd0fab5af1e4999090d-="">
<img class="icon about-icon-large" src="/assets/icon-facebook.svg" loading="lazy" alt=""/>
</div>
<div class="mail" title="Share this through Email" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('mailto:?subject=Great read! Training an LLM-RecSys Hybrid for Steerable Recs with Semantic IDs&body=https://eugeneyan.com/writing/semantic-ids/');" data-cf-modified-e98c7fd0fab5af1e4999090d-="">
<img class="icon about-icon-large" src="/assets/icon-mail.svg" loading="lazy" alt=""/>
</div>
</div>
</div>
<!-- Page navigation -->
<hr>
<div id="algolia-recs-container" style="display: none;">
<div id="algolia-related-products" style="margin-bottom: 2em;"></div>
<div id="algolia-fbt"></div>
<style>
/* Common styles for both recommendation widgets */
.algolia-recs-section-header {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 1em; /* Adjust as needed, smaller than default h3 */
font-weight: bold;
margin-top: 0; /* Remove or reduce top margin */
margin-bottom: 15px; /* Space between header and recommendation cards */
color: var(--c-text); /* Use theme's text color */
font-style: italic;
}
/* Related Products Widget Styles */
#algolia-related-products .ais-RelatedProducts-list {
display: flex;
flex-direction: row; /* Arrange items horizontally */
flex-wrap: nowrap; /* Prevent wrapping to new lines, if possible */
justify-content: flex-start; /* Align items to the start of the container */
padding-left: 0; /* Remove default list padding */
list-style-type: none; /* Remove list bullets */
margin: 0;
}
#algolia-related-products .ais-RelatedProducts-item {
width: 32%; /* Adjust for 3 items: 32% * 3 items + 2% * 2 margins = 100% */
margin-right: 2%;
box-sizing: border-box; /* Include padding and border in the element's total width */
/* Optional: Basic card styling (uncomment to use) */
border: 1px solid color-mix(in srgb, var(--c-background) 85%, var(--c-text) 15%); /* Theme-aware light grey border */
padding: 0; /* Remove overall card padding, will be handled by elements */
text-align: left; /* Or 'center' if you prefer */
background-color: var(--c-background);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
#algolia-related-products .ais-RelatedProducts-item:last-child {
margin-right: 0; /* No margin for the last item in the row */
}
/* Styling for images within recommendation items */
#algolia-related-products .ais-RelatedProducts-item img {
display: block; /* Can help remove extra space below image */
width: calc(100% - 4px); /* Full width minus 2px L/R margins */
max-width: 100%; /* Ensures image does not exceed container if intrinsically smaller */
/* height: auto; -- Controlled by inline style's max-height and object-fit */
object-fit: cover; /* Ensure image covers the area, also in inline style */
margin: 2px; /* 2px margin on top, left, right. Bottom is overridden by inline style. */
/* margin-bottom: 8px; -- This is set by inline style in JS template */
}
/* Styling for the wrapper link to make the whole card clickable */
#algolia-related-products .ais-RelatedProducts-item a.ais-RelatedProducts-item-link-wrapper {
display: block; /* Make the link fill the list item */
text-decoration: none; /* Remove underline */
color: inherit; /* Use parent's text color */
}
#algolia-related-products .related-product-title {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
display: -webkit-box;
-webkit-line-clamp: 2; /* Limit to 2 lines for WebKit browsers */
line-clamp: 2; /* Standard property */
-webkit-box-orient: vertical;
overflow: hidden;
text-overflow: ellipsis;
padding: 0 7px 7px 7px; /* 0 top, 7px L/R/B for text area */
line-height: 1.5; /* Adjust for better readability */
height: 3.3em; /* Current height: 3.3em. For 2 lines with 0.75em font & 1.5 line-height, calculated height would be 2.25em. */
color: var(--c-interactive); /* Use theme's interactive color */
}
/* This container will wrap the image and score, taking the original image's layout space. */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container {
position: relative; /* For positioning the score absolutely within */
display: block; /* Matches original image display and ensures proper block layout */
width: calc(100% - 4px); /* Adopts width from original image styling */
margin: 2px; /* Adopts margin from original image styling */
margin-bottom: 8px; /* Adopts specific bottom margin from original image's inline style */
line-height: 0; /* Prevents unexpected space if child elements are treated as inline */
}
/* The image itself, now filling the container */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container img {
display: block;
width: 100%; /* Fill the container's width */
max-width: 20em; /* Optional: retain original max-width constraint for the image content */
height: auto; /* Maintain aspect ratio by default */
max-height: 12em; /* Constrain image height (adjust as needed) */
object-fit: cover; /* Ensures image covers the allocated space, cropping if necessary */
margin: 0 auto; /* Center image if max-width kicks in and it's narrower than container */
}
/* The score overlay box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score {
position: absolute;
bottom: 3px; /* Padding from the bottom edge of the container */
right: 3px; /* Padding from the right edge of the container */
background-color: color-mix(in srgb, var(--c-background) 85%, var(--green) 15%); /* Theme-aware light green */
color: var(--green); /* Theme's green color for text */
padding: 3px 6px; /* Slightly adjusted padding */
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
font-weight: bold;
border-radius: 10px; /* More rounded corners like the example */
border: 1px solid var(--green); /* Theme's green color for border */
line-height: 1; /* Critical for small text in a small box */
z-index: 10; /* Ensure it's above the image */
box-shadow: 0 1px 2px rgba(0,0,0,0.15); /* Softer shadow */
display: flex; /* To align icon and text nicely */
align-items: center; /* Vertically center icon and text */
}
/* Styling for the SVG icon within the score box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score-icon {
width: 0.9em; /* Scale with score's font size */
height: 0.9em;
vertical-align: -0.1em; /* Fine-tune vertical alignment */
margin-right: 4px; /* Space between icon and score number */
fill: var(--green); /* Theme's green color for icon */
}
/* Frequently Bought Together Widget Styles */
#algolia-fbt .ais-FrequentlyBoughtTogether-list {
display: flex;
flex-direction: row; /* Arrange items horizontally */
flex-wrap: nowrap; /* Prevent wrapping to new lines, if possible */
justify-content: flex-start; /* Align items to the start of the container */
padding-left: 0; /* Remove default list padding */
list-style-type: none; /* Remove list bullets */
margin: 0;
}
#algolia-fbt .ais-FrequentlyBoughtTogether-item {
width: 32%; /* Adjust for 3 items: 32% * 3 items + 2% * 2 margins = 100% */
margin-right: 2%;
box-sizing: border-box; /* Include padding and border in the element's total width */
/* Optional: Basic card styling (uncomment to use) */
border: 1px solid color-mix(in srgb, var(--c-background) 85%, var(--c-text) 15%); /* Theme-aware light grey border */
padding: 0; /* Remove overall card padding, will be handled by elements */
text-align: left; /* Or 'center' if you prefer */
background-color: var(--c-background);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
#algolia-fbt .ais-FrequentlyBoughtTogether-item:last-child {
margin-right: 0; /* No margin for the last item in the row */
}
/* Styling for images within recommendation items */
#algolia-fbt .ais-FrequentlyBoughtTogether-item img {
display: block; /* Can help remove extra space below image */
width: calc(100% - 4px); /* Full width minus 2px L/R margins */
max-width: 100%; /* Ensures image does not exceed container if intrinsically smaller */
/* height: auto; -- Controlled by inline style's max-height and object-fit */
object-fit: cover; /* Ensure image covers the area, also in inline style */
margin: 2px; /* 2px margin on top, left, right. Bottom is overridden by inline style. */
/* margin-bottom: 8px; -- This is set by inline style in JS template */
}
/* Styling for the wrapper link to make the whole card clickable */
#algolia-fbt .ais-FrequentlyBoughtTogether-item a.ais-FrequentlyBoughtTogether-item-link-wrapper {
display: block; /* Make the link fill the list item */
text-decoration: none; /* Remove underline */
color: inherit; /* Use parent's text color */
}
#algolia-fbt .fbt-product-title {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
display: -webkit-box;
-webkit-line-clamp: 2; /* Limit to 2 lines for WebKit browsers */
line-clamp: 2; /* Standard property */
-webkit-box-orient: vertical;
overflow: hidden;
text-overflow: ellipsis;
padding: 0 7px 7px 7px; /* 0 top, 7px L/R/B for text area */
line-height: 1.5; /* Adjust for better readability */
height: 3.3em; /* Current height: 3.3em. For 2 lines with 0.75em font & 1.5 line-height, calculated height would be 2.25em. */
color: var(--c-interactive); /* Use theme's interactive color */
}
/* This container will wrap the image and score, taking the original image's layout space. */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-image-container {
position: relative; /* For positioning the score absolutely within */
display: block; /* Matches original image display and ensures proper block layout */
width: calc(100% - 4px); /* Adopts width from original image styling */
margin: 2px; /* Adopts margin from original image styling */
margin-bottom: 8px; /* Adopts specific bottom margin from original image's inline style */
line-height: 0; /* Prevents unexpected space if child elements are treated as inline */
}
/* The image itself, now filling the container */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-image-container img {
display: block;
width: 100%; /* Fill the container's width */
max-width: 20em; /* Optional: retain original max-width constraint for the image content */
height: auto; /* Maintain aspect ratio by default */
max-height: 12em; /* Constrain image height (adjust as needed) */
object-fit: cover; /* Ensures image covers the allocated space, cropping if necessary */
margin: 0 auto; /* Center image if max-width kicks in and it's narrower than container */
}
/* The score overlay box */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-score {
position: absolute;
bottom: 3px; /* Padding from the bottom edge of the container */
right: 3px; /* Padding from the right edge of the container */
background-color: color-mix(in srgb, var(--c-background) 85%, var(--green) 15%); /* Theme-aware light green */
color: var(--green); /* Theme's green color for text */
padding: 3px 6px; /* Slightly adjusted padding */
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
font-weight: bold;
border-radius: 10px; /* More rounded corners like the example */
border: 1px solid var(--green); /* Theme's green color for border */
line-height: 1; /* Critical for small text in a small box */
z-index: 10; /* Ensure it's above the image */
box-shadow: 0 1px 2px rgba(0,0,0,0.15); /* Softer shadow */
display: flex; /* To align icon and text nicely */
align-items: center; /* Vertically center icon and text */
}
/* Styling for the SVG icon within the score box */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-score-icon {
width: 0.9em; /* Scale with score's font size */
height: 0.9em;
vertical-align: -0.1em; /* Fine-tune vertical alignment */
margin-right: 4px; /* Space between icon and score number */
fill: var(--green); /* Theme's green color for icon */
}
</style>
<script type="e98c7fd0fab5af1e4999090d-text/javascript">
// Function to load a script and return a promise
function loadScript(src) {
return new Promise((resolve, reject) => {
if (document.querySelector(`script[src="${src}"]`)) {
resolve(); // Already loaded
return;
}
const script = document.createElement('script');
script.src = src;
script.onload = () => resolve();
script.onerror = () => reject(new Error(`Script load error for ${src}`));
document.head.appendChild(script);
});
}
function initAlgoliaRecommendations() {
Promise.all([
loadScript('https://cdn.jsdelivr.net/npm/algoliasearch@4/dist/algoliasearch-lite.umd.js'),
loadScript('https://cdn.jsdelivr.net/npm/instantsearch.js@4')
])
.then(() => {
// Initialize the Related Products widget
initRelatedProducts();
// Initialize the Frequently Bought Together widget
initFrequentlyBoughtTogether();
})
.catch(error => {
// Handle error silently
});
}
function initRelatedProducts() {
// Ensure container exists
if (!document.getElementById('algolia-related-products')) {
// Container not found, stopping initialization
return;
}
const recSearchClient = algoliasearch(
'2XJCLEABQD',
'b61ec4cb64bd32d62c053466fccbfa43'
);
const relatedSearch = instantsearch({
indexName: 'eugeneyan.com',
searchClient: recSearchClient,
clickAnalytics: true,
insights: true, // Enable insights for click tracking on recommendations
});
relatedSearch.addWidgets([
instantsearch.widgets.relatedProducts({
container: '#algolia-related-products',
objectIDs: ['/writing/semantic-ids/'],
limit: 3,
queryParameters: {
attributesToRetrieve: ['title', 'url', 'image', 'score', '_score'], // Specify only needed attributes
attributesToHighlight: [], // Disable highlighting
attributesToSnippet: [] // Disable snippeting
},
translations: {
title: '', // Custom title is in _layouts/post.html
},
transformItems: function(items) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
// Hide the Related Products container element first if there are no items
if (items.length === 0 && relatedElement) {
relatedElement.style.display = 'none';
} else if (relatedElement) {
relatedElement.style.display = 'block';
}
// Then handle the main recommendations container visibility
if (items.length > 0 && containerElement) {
containerElement.style.display = 'block';
} else if (containerElement && !document.getElementById('algolia-fbt').hasChildNodes()) {
containerElement.style.display = 'none';
} else {
}
return items;
},
templates: {
header() { // Removed unused results, html parameters
// Return a PLAIN string for the header
return '<h4 class="algolia-recs-section-header">You Might Also Like (content-based)</h4>';
},
item: function(hit, { html, sendEvent }) { // Added sendEvent to params
const itemUrl = `${hit.url || '#'}`;
const indexName = 'eugeneyan.com'; // Get index name for insights
let imageUrl;
// Ensure hit.image is not null, undefined, or an empty/whitespace string before using it.
if (hit.image && typeof hit.image === 'string' && hit.image.trim() !== '') {
imageUrl = `/assets/og_image/${hit.image}`;
} else {
imageUrl = `/assets/og_image/default-v4.jpg`; // Default image
}
let scoreValue = null;
if (typeof hit.score === 'number') scoreValue = hit.score.toFixed(2);
else if (typeof hit._score === 'number') scoreValue = hit._score.toFixed(2);
// else if (typeof hit.your_custom_score_field === 'number') scoreValue = hit.your_custom_score_field.toFixed(2);
const scoreElement = scoreValue ? `<div class="recommendation-score"><svg viewbox="0 0 24 24" class="recommendation-score-icon" xmlns="http://www.w3.org/2000/svg"><path d="M16 6l2.29 2.29-4.88 4.88-4-4L2 16.59 3.41 18l6-6 4 4 6.3-6.29L22 12V6h-6z"></path></svg>${scoreValue}</div>` : '';
const imageAndScoreTag = `
<div class="recommendation-image-container">
<img src="${imageUrl}" alt="${hit.title || 'Recommendation cover image'}">
${scoreElement}
</div>`;
const title = hit.title || 'Untitled Post';
return `
<a href="${itemUrl}"
class="ais-RelatedProducts-item-link-wrapper"
onClick="${() => {
sendEvent('click', hit, 'Related Item Clicked');
}}"
>
${imageAndScoreTag}
<div class="related-product-title">${title}</div>
</a>
`;
},
empty(results, { html }) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
// Hide Related Products container since there are no results
if (relatedElement) {
relatedElement.style.display = 'none';
}
// Check if the FBT widget also has no items before hiding the main container
const fbtWidgetContainer = document.getElementById('algolia-fbt');
if (containerElement && (!fbtWidgetContainer || !fbtWidgetContainer.hasChildNodes())) {
containerElement.style.display = 'none';
}
// Return an empty string to prevent rendering 'undefined'
return '';
}
}
})
]);
relatedSearch.start();
}
function initFrequentlyBoughtTogether() {
// Ensure container exists
if (!document.getElementById('algolia-fbt')) {
// Container not found, stopping initialization
return;
}
const recSearchClient = algoliasearch(
'2XJCLEABQD',
'b61ec4cb64bd32d62c053466fccbfa43'
);
const fbtSearch = instantsearch({
indexName: 'eugeneyan.com',
searchClient: recSearchClient,
clickAnalytics: true,
insights: true, // Enable insights for click tracking on recommendations
});
fbtSearch.addWidgets([
instantsearch.widgets.frequentlyBoughtTogether({
container: '#algolia-fbt',
objectIDs: ['/writing/semantic-ids/'],
limit: 3,
queryParameters: {
attributesToRetrieve: ['title', 'url', 'image', 'score', '_score'], // Specify only needed attributes
attributesToHighlight: [], // Disable highlighting
attributesToSnippet: [] // Disable snippeting
},
translations: {
title: '', // Custom title is in _layouts/post.html
},
transformItems: function(items) {
const containerElement = document.getElementById('algolia-recs-container');
if (items.length > 0 && containerElement) {
containerElement.style.display = 'block';
} else if (containerElement && !document.getElementById('algolia-related-products').hasChildNodes()) {
containerElement.style.display = 'none';
} else {
}
return items;
},
templates: {
header() {
// Return a PLAIN string for the header - only if we have items to display
// The header should not render if there's no content
return '<h4 class="algolia-recs-section-header">Frequently Read Together (behavioral-based)</h4>';
},
item: function(hit, { html, sendEvent }) {
const itemUrl = `${hit.url || '#'}`;
const indexName = 'eugeneyan.com'; // Get index name for insights
let imageUrl;
// Ensure hit.image is not null, undefined, or an empty/whitespace string before using it.
if (hit.image && typeof hit.image === 'string' && hit.image.trim() !== '') {
imageUrl = `/assets/og_image/${hit.image}`;
} else {
imageUrl = `/assets/og_image/default-v4.jpg`; // Default image
}
let scoreValue = null;
if (typeof hit.score === 'number') scoreValue = hit.score.toFixed(2);
else if (typeof hit._score === 'number') scoreValue = hit._score.toFixed(2);
// else if (typeof hit.your_custom_score_field === 'number') scoreValue = hit.your_custom_score_field.toFixed(2);
const scoreElement = scoreValue ? `<div class="recommendation-score"><svg viewbox="0 0 24 24" class="recommendation-score-icon" xmlns="http://www.w3.org/2000/svg"><path d="M16 6l2.29 2.29-4.88 4.88-4-4L2 16.59 3.41 18l6-6 4 4 6.3-6.29L22 12V6h-6z"></path></svg>${scoreValue}</div>` : '';
const imageAndScoreTag = `
<div class="recommendation-image-container">
<img src="${imageUrl}" alt="${hit.title || 'Recommendation cover image'}">
${scoreElement}
</div>`;
const title = hit.title || 'Untitled Post';
return `
<a href="${itemUrl}"
class="ais-FrequentlyBoughtTogether-item-link-wrapper"
onClick="${() => {
sendEvent('click', hit, 'FBT Item Clicked');
}}"
>
${imageAndScoreTag}
<div class="fbt-product-title">${title}</div>
</a>
`;
},
empty(results, { html }) {
const containerElement = document.getElementById('algolia-recs-container');
const fbtElement = document.getElementById('algolia-fbt');
// Hide FBT container since there are no results
if (fbtElement) {
fbtElement.style.display = 'none';
}
// Check if the Related Products widget also has no items before hiding the main container
const relatedWidgetContainer = document.getElementById('algolia-related-products');
if (containerElement && (!relatedWidgetContainer || !relatedWidgetContainer.hasChildNodes())) {
containerElement.style.display = 'none';
}
// Return an empty string to prevent rendering 'undefined'
return '';
}
}
})
]);
fbtSearch.start();
}
document.addEventListener('DOMContentLoaded', function() {
let recsLoaded = false;
function checkLoad() {
if (recsLoaded) return;
if ((window.scrollY + window.innerHeight) >= document.body.scrollHeight - 500) {
recsLoaded = true;
window.removeEventListener('scroll', checkLoad);
initAlgoliaRecommendations();
}
}
window.addEventListener('scroll', checkLoad, { passive: true });
checkLoad();
});
</script>
</div>
<br>
<!-- <div id="algolia-recs-container" style="display: none;">
<div id="algolia-related-products" style="margin-bottom: 2em;"></div>
<style>
/* Common styles for both recommendation widgets */
.algolia-recs-section-header {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 1em; /* Adjust as needed, smaller than default h3 */
font-weight: bold;
margin-top: 0; /* Remove or reduce top margin */
margin-bottom: 15px; /* Space between header and recommendation cards */
color: var(--c-text); /* Use theme's text color */
font-style: italic;
}
/* Related Products Widget Styles */
#algolia-related-products .ais-RelatedProducts-list {
display: flex;
flex-direction: row; /* Arrange items horizontally */
flex-wrap: nowrap; /* Prevent wrapping to new lines, if possible */
justify-content: flex-start; /* Align items to the start of the container */
padding-left: 0; /* Remove default list padding */
list-style-type: none; /* Remove list bullets */
margin: 0;
}
#algolia-related-products .ais-RelatedProducts-item {
width: 32%; /* Adjust for 3 items: 32% * 3 items + 2% * 2 margins = 100% */
margin-right: 2%;
box-sizing: border-box; /* Include padding and border in the element's total width */
/* Optional: Basic card styling (uncomment to use) */
border: 1px solid color-mix(in srgb, var(--c-background) 85%, var(--c-text) 15%); /* Theme-aware light grey border */
padding: 0; /* Remove overall card padding, will be handled by elements */
text-align: left; /* Or 'center' if you prefer */
background-color: var(--c-background);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
#algolia-related-products .ais-RelatedProducts-item:last-child {
margin-right: 0; /* No margin for the last item in the row */
}
/* Styling for images within recommendation items */
#algolia-related-products .ais-RelatedProducts-item img {
display: block; /* Can help remove extra space below image */
width: calc(100% - 4px); /* Full width minus 2px L/R margins */
max-width: 100%; /* Ensures image does not exceed container if intrinsically smaller */
/* height: auto; -- Controlled by inline style's max-height and object-fit */
object-fit: cover; /* Ensure image covers the area, also in inline style */
margin: 2px; /* 2px margin on top, left, right. Bottom is overridden by inline style. */
/* margin-bottom: 8px; -- This is set by inline style in JS template */
}
/* Styling for the wrapper link to make the whole card clickable */
#algolia-related-products .ais-RelatedProducts-item a.ais-RelatedProducts-item-link-wrapper {
display: block; /* Make the link fill the list item */
text-decoration: none; /* Remove underline */
color: inherit; /* Use parent's text color */
}
#algolia-related-products .related-product-title {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
display: -webkit-box;
-webkit-line-clamp: 2; /* Limit to 2 lines for WebKit browsers */
line-clamp: 2; /* Standard property */
-webkit-box-orient: vertical;
overflow: hidden;
text-overflow: ellipsis;
padding: 0 7px 7px 7px; /* 0 top, 7px L/R/B for text area */
line-height: 1.5; /* Adjust for better readability */
height: 3.3em; /* Current height: 3.3em. For 2 lines with 0.75em font & 1.5 line-height, calculated height would be 2.25em. */
color: var(--c-interactive); /* Use theme's interactive color */
}
/* This container will wrap the image and score, taking the original image's layout space. */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container {
position: relative; /* For positioning the score absolutely within */
display: block; /* Matches original image display and ensures proper block layout */
width: calc(100% - 4px); /* Adopts width from original image styling */
margin: 2px; /* Adopts margin from original image styling */
margin-bottom: 8px; /* Adopts specific bottom margin from original image's inline style */
line-height: 0; /* Prevents unexpected space if child elements are treated as inline */
}
/* The image itself, now filling the container */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container img {
display: block;
width: 100%; /* Fill the container's width */
max-width: 20em; /* Optional: retain original max-width constraint for the image content */
height: auto; /* Maintain aspect ratio by default */
max-height: 12em; /* Constrain image height (adjust as needed) */
object-fit: cover; /* Ensures image covers the allocated space, cropping if necessary */
margin: 0 auto; /* Center image if max-width kicks in and it's narrower than container */
}
/* The score overlay box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score {
position: absolute;
bottom: 3px; /* Padding from the bottom edge of the container */
right: 3px; /* Padding from the right edge of the container */
background-color: color-mix(in srgb, var(--c-background) 85%, var(--green) 15%); /* Theme-aware light green */
color: var(--green); /* Theme's green color for text */
padding: 3px 6px; /* Slightly adjusted padding */
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
font-weight: bold;
border-radius: 10px; /* More rounded corners like the example */
border: 1px solid var(--green); /* Theme's green color for border */
line-height: 1; /* Critical for small text in a small box */
z-index: 10; /* Ensure it's above the image */
box-shadow: 0 1px 2px rgba(0,0,0,0.15); /* Softer shadow */
display: flex; /* To align icon and text nicely */
align-items: center; /* Vertically center icon and text */
}
/* Styling for the SVG icon within the score box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score-icon {
width: 0.9em; /* Scale with score's font size */
height: 0.9em;
vertical-align: -0.1em; /* Fine-tune vertical alignment */
margin-right: 4px; /* Space between icon and score number */
fill: var(--green); /* Theme's green color for icon */
}
</style>
<script>
// Function to load a script and return a promise
function loadScript(src) {
return new Promise((resolve, reject) => {
if (document.querySelector(`script[src="${src}"]`)) {
resolve(); // Already loaded
return;
}
const script = document.createElement('script');
script.src = src;
script.onload = () => resolve();
script.onerror = () => reject(new Error(`Script load error for ${src}`));
document.head.appendChild(script);
});
}
function initAlgoliaRecommendations() {
Promise.all([
loadScript('https://cdn.jsdelivr.net/npm/algoliasearch@4/dist/algoliasearch-lite.umd.js'),
loadScript('https://cdn.jsdelivr.net/npm/instantsearch.js@4')
])
.then(() => {
// Initialize the Related Products widget
initRelatedProducts();
})
.catch(error => {
// Handle error silently
});
}
function initRelatedProducts() {
// Ensure container exists
if (!document.getElementById('algolia-related-products')) {
// Container not found, stopping initialization
return;
}
const recSearchClient = algoliasearch(
'2XJCLEABQD',
'b61ec4cb64bd32d62c053466fccbfa43'
);
const relatedSearch = instantsearch({
indexName: 'eugeneyan.com',
searchClient: recSearchClient,
clickAnalytics: true,
insights: true, // Enable insights for click tracking on recommendations
});
relatedSearch.addWidgets([
instantsearch.widgets.relatedProducts({
container: '#algolia-related-products',
objectIDs: ['/writing/semantic-ids/'],
limit: 3,
queryParameters: {
attributesToRetrieve: ['title', 'url', 'image', 'score', '_score'], // Specify only needed attributes
attributesToHighlight: [], // Disable highlighting
attributesToSnippet: [] // Disable snippeting
},
translations: {
title: '', // Custom title is in _layouts/post.html
},
transformItems: function(items) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
if (items.length === 0) {
if (relatedElement) relatedElement.style.display = 'none';
if (containerElement) containerElement.style.display = 'none';
} else {
if (relatedElement) relatedElement.style.display = 'block';
if (containerElement) containerElement.style.display = 'block';
}
return items;
},
templates: {
header() { // Removed unused results, html parameters
// Return a PLAIN string for the header
return '<h4 class="algolia-recs-section-header">You Might Also Like</h4>';
},
item: function(hit, { html, sendEvent }) { // Added sendEvent to params
const itemUrl = `${hit.url || '#'}`;
const indexName = 'eugeneyan.com'; // Get index name for insights
let imageUrl;
// Ensure hit.image is not null, undefined, or an empty/whitespace string before using it.
if (hit.image && typeof hit.image === 'string' && hit.image.trim() !== '') {
imageUrl = `/assets/og_image/${hit.image}`;
} else {
imageUrl = `/assets/og_image/default-v4.jpg`; // Default image
}
let scoreValue = null;
if (typeof hit.score === 'number') scoreValue = hit.score.toFixed(2);
else if (typeof hit._score === 'number') scoreValue = hit._score.toFixed(2);
// else if (typeof hit.your_custom_score_field === 'number') scoreValue = hit.your_custom_score_field.toFixed(2);
const scoreElement = scoreValue ? `<div class="recommendation-score"><svg viewbox="0 0 24 24" class="recommendation-score-icon" xmlns="http://www.w3.org/2000/svg"><path d="M16 6l2.29 2.29-4.88 4.88-4-4L2 16.59 3.41 18l6-6 4 4 6.3-6.29L22 12V6h-6z"></path></svg>${scoreValue}</div>` : '';
const imageAndScoreTag = `
<div class="recommendation-image-container">
<img src="${imageUrl}" alt="${hit.title || 'Recommendation cover image'}">
${scoreElement}
</div>`;
const title = hit.title || 'Untitled Post';
return `
<a href="${itemUrl}"
class="ais-RelatedProducts-item-link-wrapper"
onClick="${() => {
sendEvent('click', hit, 'Related Item Clicked');
}}"
>
${imageAndScoreTag}
<div class="related-product-title">${title}</div>
</a>
`;
},
empty(results, { html }) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
if (relatedElement) {
relatedElement.style.display = 'none';
}
// If this 'empty' template is called, it means related products are empty.
// So, the main container should also be hidden.
if (containerElement) {
containerElement.style.display = 'none';
}
// Return an empty string to prevent rendering 'undefined'
return '';
}
}
})
]);
relatedSearch.start();
}
document.addEventListener('DOMContentLoaded', function() {
let recsLoaded = false;
function checkLoad() {
if (recsLoaded) return;
if ((window.scrollY + window.innerHeight) >= document.body.scrollHeight - 500) {
recsLoaded = true;
window.removeEventListener('scroll', checkLoad);
initAlgoliaRecommendations();
}
}
window.addEventListener('scroll', checkLoad, { passive: true });
checkLoad();
});
</script>
</div> -->
<span style="font-family: 'Raleway', Helvetica, sans-serif;">Browse related tags:</span> <span class="no-italics">[
<a class='tag' href="/tag/llm/">llm</a>
<a class='tag' href="/tag/recsys/">recsys</a>
<a class='tag' href="/tag/learning/">learning</a>
<a class='tag' href="/tag/🛠/">🛠</a>
<a class='tag' href="/tag/🩷/">🩷</a>
]
</span> <span style="font-family: 'Raleway', Helvetica, sans-serif;"> or </span><a href="/search/" title="Search" style="text-decoration: none; font-family: 'Raleway', Helvetica, sans-serif;"><img class="icon icon-search" src="/assets/icon-search.svg" loading="lazy" alt="" style="vertical-align: middle; margin-right: 0.25em;"/>Search</a>
<div class="PageNavigation">
<a class="prev sans-serif" href="/writing/qa-evals/">« Evaluating Long-Context Question & Answer Systems</a>
<a class="next sans-serif" href="/writing/principal/">Advice for New Principal Tech ICs (i.e., Notes to Myself) »</a>
</div>
<hr>
<p style="font-size: 15px; text-align: center; margin: 2em 0 0.5em">Join <b>11,800+</b> readers getting updates on machine learning, RecSys, LLMs, and engineering.</p>
<script src="https://f.convertkit.com/ckjs/ck.6.js" type="e98c7fd0fab5af1e4999090d-text/javascript"></script>
<form action="https://app.convertkit.com/forms/4004980/subscriptions" class="seva-form formkit-form" method="post" data-sv-form="4004980" data-uid="96a310b6ce" data-format="inline" data-version="6" data-options="{"settings":{"after_subscribe":{"action":"message","success_message":"Just sent a confirmation! Check your inbox.","redirect_url":""},"analytics":{"google":null,"fathom":null,"facebook":null,"segment":null,"pinterest":null,"sparkloop":null,"googletagmanager":null},"modal":{"trigger":"timer","scroll_percentage":null,"timer":5,"devices":"all","show_once_every":15},"powered_by":{"show":false,"url":"https://convertkit.com/features/forms?utm_campaign=poweredby&utm_content=form&utm_medium=referral&utm_source=dynamic"},"recaptcha":{"enabled":false},"return_visitor":{"action":"show","custom_content":""},"slide_in":{"display_in":"bottom_right","trigger":"timer","scroll_percentage":null,"timer":5,"devices":"all","show_once_every":15},"sticky_bar":{"display_in":"top","trigger":"timer","scroll_percentage":null,"timer":5,"devices":"all","show_once_every":15}},"version":"6"}" min-width="400 500 600 700 800"><div data-style="clean"><ul class="formkit-alert formkit-alert-error" data-element="errors" data-group="alert"></ul><div data-element="fields" data-stacked="false" class="seva-fields formkit-fields"><div class="formkit-field"><input class="formkit-input" name="email_address" style="color: rgb(0, 0, 0); border-color: rgb(227, 227, 227); border-radius: 4px; font-weight: 400;" aria-label="Your email address..." placeholder="Your email address..." required="" type="email"></div><button data-element="submit" class="formkit-submit formkit-submit" style="color: rgb(255, 255, 255); background-color: rgb(0, 123, 255); border-radius: 5px; font-weight: 400;"><div class="formkit-spinner"><div></div><div></div><div></div></div><span class="">Get email updates</span></button></div></div><style>.formkit-form[data-uid="96a310b6ce"] *{box-sizing:border-box;}.formkit-form[data-uid="96a310b6ce"]{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;}.formkit-form[data-uid="96a310b6ce"] legend{border:none;font-size:inherit;margin-bottom:10px;padding:0;position:relative;display:table;}.formkit-form[data-uid="96a310b6ce"] fieldset{border:0;padding:0.01em 0 0 0;margin:0;min-width:0;}.formkit-form[data-uid="96a310b6ce"] body:not(:-moz-handler-blocked) fieldset{display:table-cell;}.formkit-form[data-uid="96a310b6ce"] h1,.formkit-form[data-uid="96a310b6ce"] h2,.formkit-form[data-uid="96a310b6ce"] h3,.formkit-form[data-uid="96a310b6ce"] h4,.formkit-form[data-uid="96a310b6ce"] h5,.formkit-form[data-uid="96a310b6ce"] h6{color:inherit;font-size:inherit;font-weight:inherit;}.formkit-form[data-uid="96a310b6ce"] h2{font-size:1.5em;margin:1em 0;}.formkit-form[data-uid="96a310b6ce"] h3{font-size:1.17em;margin:1em 0;}.formkit-form[data-uid="96a310b6ce"] p{color:inherit;font-size:inherit;font-weight:inherit;}.formkit-form[data-uid="96a310b6ce"] ol:not([template-default]),.formkit-form[data-uid="96a310b6ce"] ul:not([template-default]),.formkit-form[data-uid="96a310b6ce"] blockquote:not([template-default]){text-align:left;}.formkit-form[data-uid="96a310b6ce"] p:not([template-default]),.formkit-form[data-uid="96a310b6ce"] hr:not([template-default]),.formkit-form[data-uid="96a310b6ce"] blockquote:not([template-default]),.formkit-form[data-uid="96a310b6ce"] ol:not([template-default]),.formkit-form[data-uid="96a310b6ce"] ul:not([template-default]){color:inherit;font-style:initial;}.formkit-form[data-uid="96a310b6ce"] .ordered-list,.formkit-form[data-uid="96a310b6ce"] .unordered-list{list-style-position:outside !important;padding-left:1em;}.formkit-form[data-uid="96a310b6ce"] .list-item{padding-left:0;}.formkit-form[data-uid="96a310b6ce"][data-format="modal"]{display:none;}.formkit-form[data-uid="96a310b6ce"][data-format="slide in"]{display:none;}.formkit-form[data-uid="96a310b6ce"][data-format="sticky bar"]{display:none;}.formkit-sticky-bar .formkit-form[data-uid="96a310b6ce"][data-format="sticky bar"]{display:block;}.formkit-form[data-uid="96a310b6ce"] .formkit-input,.formkit-form[data-uid="96a310b6ce"] .formkit-select,.formkit-form[data-uid="96a310b6ce"] .formkit-checkboxes{width:100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-button,.formkit-form[data-uid="96a310b6ce"] .formkit-submit{border:0;border-radius:5px;color:#ffffff;cursor:pointer;display:inline-block;text-align:center;font-size:15px;font-weight:500;cursor:pointer;margin-bottom:15px;overflow:hidden;padding:0;position:relative;vertical-align:middle;}.formkit-form[data-uid="96a310b6ce"] .formkit-button:hover,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:hover,.formkit-form[data-uid="96a310b6ce"] .formkit-button:focus,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:focus{outline:none;}.formkit-form[data-uid="96a310b6ce"] .formkit-button:hover > span,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:hover > span,.formkit-form[data-uid="96a310b6ce"] .formkit-button:focus > span,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:focus > span{background-color:rgba(0,0,0,0.1);}.formkit-form[data-uid="96a310b6ce"] .formkit-button > span,.formkit-form[data-uid="96a310b6ce"] .formkit-submit > span{display:block;-webkit-transition:all 300ms ease-in-out;transition:all 300ms ease-in-out;padding:12px 24px;}.formkit-form[data-uid="96a310b6ce"] .formkit-input{background:#ffffff;font-size:15px;padding:12px;border:1px solid #e3e3e3;-webkit-flex:1 0 auto;-ms-flex:1 0 auto;flex:1 0 auto;line-height:1.4;margin:0;-webkit-transition:border-color ease-out 300ms;transition:border-color ease-out 300ms;}.formkit-form[data-uid="96a310b6ce"] .formkit-input:focus{outline:none;border-color:#1677be;-webkit-transition:border-color ease 300ms;transition:border-color ease 300ms;}.formkit-form[data-uid="96a310b6ce"] .formkit-input::-webkit-input-placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] .formkit-input::-moz-placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] .formkit-input:-ms-input-placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] .formkit-input::placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"]{position:relative;display:inline-block;width:100%;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"]::before{content:"";top:calc(50% - 2.5px);right:10px;position:absolute;pointer-events:none;border-color:#4f4f4f transparent transparent transparent;border-style:solid;border-width:6px 6px 0 6px;height:0;width:0;z-index:999;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"] select{height:auto;width:100%;cursor:pointer;color:#333333;line-height:1.4;margin-bottom:0;padding:0 6px;-webkit-appearance:none;-moz-appearance:none;appearance:none;font-size:15px;padding:12px;padding-right:25px;border:1px solid #e3e3e3;background:#ffffff;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"] select:focus{outline:none;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"]{text-align:left;margin:0;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"]{margin-bottom:10px;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] *{cursor:pointer;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"]:last-of-type{margin-bottom:0;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"]{display:none;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"] + label::after{content:none;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"]:checked + label::after{border-color:#ffffff;content:"";}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"]:checked + label::before{background:#10bf7a;border-color:#10bf7a;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label{position:relative;display:inline-block;padding-left:28px;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::before,.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::after{position:absolute;content:"";display:inline-block;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::before{height:16px;width:16px;border:1px solid #e3e3e3;background:#ffffff;left:0px;top:3px;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::after{height:4px;width:8px;border-left:2px solid #4d4d4d;border-bottom:2px solid #4d4d4d;-webkit-transform:rotate(-45deg);-ms-transform:rotate(-45deg);transform:rotate(-45deg);left:4px;top:8px;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert{background:#f9fafb;border:1px solid #e3e3e3;border-radius:5px;-webkit-flex:1 0 auto;-ms-flex:1 0 auto;flex:1 0 auto;list-style:none;margin:25px auto;padding:12px;text-align:center;width:100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert:empty{display:none;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert-success{background:#d3fbeb;border-color:#10bf7a;color:#0c905c;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert-error{background:#fde8e2;border-color:#f2643b;color:#ea4110;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;height:0px;width:0px;margin:0 auto;position:absolute;top:0;left:0;right:0;width:0px;overflow:hidden;text-align:center;-webkit-transition:all 300ms ease-in-out;transition:all 300ms ease-in-out;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner > div{margin:auto;width:12px;height:12px;background-color:#fff;opacity:0.3;border-radius:100%;display:inline-block;-webkit-animation:formkit-bouncedelay-formkit-form-data-uid-96a310b6ce- 1.4s infinite ease-in-out both;animation:formkit-bouncedelay-formkit-form-data-uid-96a310b6ce- 1.4s infinite ease-in-out both;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner > div:nth-child(1){-webkit-animation-delay:-0.32s;animation-delay:-0.32s;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner > div:nth-child(2){-webkit-animation-delay:-0.16s;animation-delay:-0.16s;}.formkit-form[data-uid="96a310b6ce"] .formkit-submit[data-active] .formkit-spinner{opacity:1;height:100%;width:50px;}.formkit-form[data-uid="96a310b6ce"] .formkit-submit[data-active] .formkit-spinner ~ span{opacity:0;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by[data-active="false"]{opacity:0.35;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit-container{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;width:100%;z-index:5;margin:10px 0;position:relative;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit-container[data-active="false"]{opacity:0.35;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit{-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;background-color:#ffffff;border:1px solid #dde2e7;border-radius:4px;color:#373f45;cursor:pointer;display:block;height:36px;margin:0 auto;opacity:0.95;padding:0;-webkit-text-decoration:none;text-decoration:none;text-indent:100%;-webkit-transition:ease-in-out all 200ms;transition:ease-in-out all 200ms;white-space:nowrap;overflow:hidden;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;width:190px;background-repeat:no-repeat;background-position:center;background-image:url("data:image/svg+xml;charset=utf8,%3Csvg width='162' height='20' viewBox='0 0 162 20' fill='none' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M83.0561 15.2457C86.675 15.2457 89.4722 12.5154 89.4722 9.14749C89.4722 5.99211 86.8443 4.06563 85.1038 4.06563C82.6801 4.06563 80.7373 5.76407 80.4605 8.28551C80.4092 8.75244 80.0387 9.14403 79.5686 9.14069C78.7871 9.13509 77.6507 9.12841 76.9314 9.13092C76.6217 9.13199 76.3658 8.88106 76.381 8.57196C76.4895 6.38513 77.2218 4.3404 78.618 2.76974C80.1695 1.02445 82.4289 0 85.1038 0C89.5979 0 93.8406 4.07791 93.8406 9.14749C93.8406 14.7608 89.1832 19.3113 83.1517 19.3113C78.8502 19.3113 74.5179 16.5041 73.0053 12.5795C72.9999 12.565 72.9986 12.5492 73.0015 12.534C73.0218 12.4179 73.0617 12.3118 73.1011 12.2074C73.1583 12.0555 73.2143 11.907 73.2062 11.7359L73.18 11.1892C73.174 11.0569 73.2075 10.9258 73.2764 10.8127C73.3452 10.6995 73.4463 10.6094 73.5666 10.554L73.7852 10.4523C73.9077 10.3957 74.0148 10.3105 74.0976 10.204C74.1803 10.0974 74.2363 9.97252 74.2608 9.83983C74.3341 9.43894 74.6865 9.14749 75.0979 9.14749C75.7404 9.14749 76.299 9.57412 76.5088 10.1806C77.5188 13.1 79.1245 15.2457 83.0561 15.2457Z' fill='%23373F45'/%3E%3Cpath d='M155.758 6.91365C155.028 6.91365 154.804 6.47916 154.804 5.98857C154.804 5.46997 154.986 5.06348 155.758 5.06348C156.53 5.06348 156.712 5.46997 156.712 5.98857C156.712 6.47905 156.516 6.91365 155.758 6.91365ZM142.441 12.9304V9.32833L141.415 9.32323V8.90392C141.415 8.44719 141.786 8.07758 142.244 8.07986L142.441 8.08095V6.55306L144.082 6.09057V8.08073H145.569V8.50416C145.569 8.61242 145.548 8.71961 145.506 8.81961C145.465 8.91961 145.404 9.01047 145.328 9.08699C145.251 9.16351 145.16 9.2242 145.06 9.26559C144.96 9.30698 144.853 9.32826 144.745 9.32822H144.082V12.7201C144.082 13.2423 144.378 13.4256 144.76 13.4887C145.209 13.5629 145.583 13.888 145.583 14.343V14.9626C144.029 14.9626 142.441 14.8942 142.441 12.9304Z' fill='%23373F45'/%3E%3Cpath d='M110.058 7.92554C108.417 7.88344 106.396 8.92062 106.396 11.5137C106.396 14.0646 108.417 15.0738 110.058 15.0318C111.742 15.0738 113.748 14.0646 113.748 11.5137C113.748 8.92062 111.742 7.88344 110.058 7.92554ZM110.07 13.7586C108.878 13.7586 108.032 12.8905 108.032 11.461C108.032 10.1013 108.878 9.20569 110.071 9.20569C111.263 9.20569 112.101 10.0995 112.101 11.459C112.101 12.8887 111.263 13.7586 110.07 13.7586Z' fill='%23373F45'/%3E%3Cpath d='M118.06 7.94098C119.491 7.94098 120.978 8.33337 120.978 11.1366V14.893H120.063C119.608 14.893 119.238 14.524 119.238 14.0689V10.9965C119.238 9.66506 118.747 9.16047 117.891 9.16047C117.414 9.16047 116.797 9.52486 116.502 9.81915V14.069C116.502 14.1773 116.481 14.2845 116.44 14.3845C116.398 14.4845 116.337 14.5753 116.261 14.6519C116.184 14.7284 116.093 14.7891 115.993 14.8305C115.893 14.8719 115.786 14.8931 115.678 14.8931H114.847V8.10918H115.773C115.932 8.10914 116.087 8.16315 116.212 8.26242C116.337 8.36168 116.424 8.50033 116.46 8.65577C116.881 8.19328 117.428 7.94098 118.06 7.94098ZM122.854 8.09713C123.024 8.09708 123.19 8.1496 123.329 8.2475C123.468 8.34541 123.574 8.48391 123.631 8.64405L125.133 12.8486L126.635 8.64415C126.692 8.48402 126.798 8.34551 126.937 8.2476C127.076 8.1497 127.242 8.09718 127.412 8.09724H128.598L126.152 14.3567C126.091 14.5112 125.986 14.6439 125.849 14.7374C125.711 14.831 125.549 14.881 125.383 14.8809H124.333L121.668 8.09713H122.854Z' fill='%23373F45'/%3E%3Cpath d='M135.085 14.5514C134.566 14.7616 133.513 15.0416 132.418 15.0416C130.496 15.0416 129.024 13.9345 129.024 11.4396C129.024 9.19701 130.451 7.99792 132.191 7.99792C134.338 7.99792 135.254 9.4378 135.158 11.3979C135.139 11.8029 134.786 12.0983 134.38 12.0983H130.679C130.763 13.1916 131.562 13.7662 132.615 13.7662C133.028 13.7662 133.462 13.7452 133.983 13.6481C134.535 13.545 135.085 13.9375 135.085 14.4985V14.5514ZM133.673 10.949C133.785 9.87621 133.061 9.28752 132.191 9.28752C131.321 9.28752 130.734 9.93979 130.679 10.9489L133.673 10.949Z' fill='%23373F45'/%3E%3Cpath d='M137.345 8.11122C137.497 8.11118 137.645 8.16229 137.765 8.25635C137.884 8.35041 137.969 8.48197 138.005 8.62993C138.566 8.20932 139.268 7.94303 139.759 7.94303C139.801 7.94303 140.068 7.94303 140.489 7.99913V8.7265C140.489 9.11748 140.15 9.4147 139.759 9.4147C139.31 9.4147 138.651 9.5829 138.131 9.8773V14.8951H136.462V8.11112L137.345 8.11122ZM156.6 14.0508V8.09104H155.769C155.314 8.09104 154.944 8.45999 154.944 8.9151V14.8748H155.775C156.23 14.8748 156.6 14.5058 156.6 14.0508ZM158.857 12.9447V9.34254H157.749V8.91912C157.749 8.46401 158.118 8.09506 158.574 8.09506H158.857V6.56739L160.499 6.10479V8.09506H161.986V8.51848C161.986 8.97359 161.617 9.34254 161.161 9.34254H160.499V12.7345C160.499 13.2566 160.795 13.44 161.177 13.503C161.626 13.5774 162 13.9024 162 14.3574V14.977C160.446 14.977 158.857 14.9086 158.857 12.9447ZM98.1929 10.1124C98.2033 6.94046 100.598 5.16809 102.895 5.16809C104.171 5.16809 105.342 5.44285 106.304 6.12953L105.914 6.6631C105.654 7.02011 105.16 7.16194 104.749 6.99949C104.169 6.7702 103.622 6.7218 103.215 6.7218C101.335 6.7218 99.9169 7.92849 99.9068 10.1123C99.9169 12.2959 101.335 13.5201 103.215 13.5201C103.622 13.5201 104.169 13.4717 104.749 13.2424C105.16 13.0799 105.654 13.2046 105.914 13.5615L106.304 14.0952C105.342 14.7819 104.171 15.0566 102.895 15.0566C100.598 15.0566 98.2033 13.2842 98.1929 10.1124ZM147.619 5.21768C148.074 5.21768 148.444 5.58663 148.444 6.04174V9.81968L151.82 5.58131C151.897 5.47733 151.997 5.39282 152.112 5.3346C152.227 5.27638 152.355 5.24607 152.484 5.24611H153.984L150.166 10.0615L153.984 14.8749H152.484C152.355 14.8749 152.227 14.8446 152.112 14.7864C151.997 14.7281 151.897 14.6436 151.82 14.5397L148.444 10.3025V14.0508C148.444 14.5059 148.074 14.8749 147.619 14.8749H146.746V5.21768H147.619Z' fill='%23373F45'/%3E%3Cpath d='M0.773438 6.5752H2.68066C3.56543 6.5752 4.2041 6.7041 4.59668 6.96191C4.99219 7.21973 5.18994 7.62695 5.18994 8.18359C5.18994 8.55859 5.09326 8.87061 4.8999 9.11963C4.70654 9.36865 4.42822 9.52539 4.06494 9.58984V9.63379C4.51611 9.71875 4.84717 9.88721 5.05811 10.1392C5.27197 10.3882 5.37891 10.7266 5.37891 11.1543C5.37891 11.7314 5.17676 12.1841 4.77246 12.5122C4.37109 12.8374 3.81152 13 3.09375 13H0.773438V6.5752ZM1.82373 9.22949H2.83447C3.27393 9.22949 3.59473 9.16064 3.79688 9.02295C3.99902 8.88232 4.1001 8.64502 4.1001 8.31104C4.1001 8.00928 3.99023 7.79102 3.77051 7.65625C3.55371 7.52148 3.20801 7.4541 2.7334 7.4541H1.82373V9.22949ZM1.82373 10.082V12.1167H2.93994C3.37939 12.1167 3.71045 12.0332 3.93311 11.8662C4.15869 11.6963 4.27148 11.4297 4.27148 11.0664C4.27148 10.7324 4.15723 10.4849 3.92871 10.3237C3.7002 10.1626 3.35303 10.082 2.88721 10.082H1.82373Z' fill='%23373F45'/%3E%3Cpath d='M13.011 6.5752V10.7324C13.011 11.207 12.9084 11.623 12.7034 11.9805C12.5012 12.335 12.2068 12.6089 11.8201 12.8022C11.4363 12.9927 10.9763 13.0879 10.4402 13.0879C9.6433 13.0879 9.02368 12.877 8.5813 12.4551C8.13892 12.0332 7.91772 11.4531 7.91772 10.7148V6.5752H8.9724V10.6401C8.9724 11.1704 9.09546 11.5615 9.34155 11.8135C9.58765 12.0654 9.96557 12.1914 10.4753 12.1914C11.4656 12.1914 11.9607 11.6714 11.9607 10.6313V6.5752H13.011Z' fill='%23373F45'/%3E%3Cpath d='M15.9146 13V6.5752H16.9649V13H15.9146Z' fill='%23373F45'/%3E%3Cpath d='M19.9255 13V6.5752H20.9758V12.0991H23.696V13H19.9255Z' fill='%23373F45'/%3E%3Cpath d='M28.2828 13H27.2325V7.47607H25.3428V6.5752H30.1724V7.47607H28.2828V13Z' fill='%23373F45'/%3E%3Cpath d='M41.9472 13H40.8046L39.7148 9.16796C39.6679 9.00097 39.6093 8.76074 39.539 8.44727C39.4687 8.13086 39.4262 7.91113 39.4116 7.78809C39.3823 7.97559 39.3339 8.21875 39.2665 8.51758C39.2021 8.81641 39.1479 9.03905 39.1039 9.18554L38.0405 13H36.8979L36.0673 9.7832L35.2236 6.5752H36.2958L37.2143 10.3193C37.3578 10.9199 37.4604 11.4502 37.5219 11.9102C37.5541 11.6611 37.6025 11.3828 37.6669 11.0752C37.7314 10.7676 37.79 10.5186 37.8427 10.3281L38.8886 6.5752H39.9301L41.0024 10.3457C41.1049 10.6943 41.2133 11.2158 41.3276 11.9102C41.3715 11.4912 41.477 10.958 41.644 10.3105L42.558 6.5752H43.6215L41.9472 13Z' fill='%23373F45'/%3E%3Cpath d='M45.7957 13V6.5752H46.846V13H45.7957Z' fill='%23373F45'/%3E%3Cpath d='M52.0258 13H50.9755V7.47607H49.0859V6.5752H53.9155V7.47607H52.0258V13Z' fill='%23373F45'/%3E%3Cpath d='M61.2312 13H60.1765V10.104H57.2146V13H56.1643V6.5752H57.2146V9.20312H60.1765V6.5752H61.2312V13Z' fill='%23373F45'/%3E%3C/svg%3E");}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit:hover,.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit:focus{background-color:#ffffff;-webkit-transform:scale(1.025) perspective(1px);-ms-transform:scale(1.025) perspective(1px);transform:scale(1.025) perspective(1px);opacity:1;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit[data-variant="dark"],.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit[data-variant="light"]{background-color:transparent;border-color:transparent;width:166px;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit[data-variant="light"]{color:#ffffff;background-image:url("data:image/svg+xml;charset=utf8,%3Csvg width='162' height='20' viewBox='0 0 162 20' fill='none' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M83.0561 15.2457C86.675 15.2457 89.4722 12.5154 89.4722 9.14749C89.4722 5.99211 86.8443 4.06563 85.1038 4.06563C82.6801 4.06563 80.7373 5.76407 80.4605 8.28551C80.4092 8.75244 80.0387 9.14403 79.5686 9.14069C78.7871 9.13509 77.6507 9.12841 76.9314 9.13092C76.6217 9.13199 76.3658 8.88106 76.381 8.57196C76.4895 6.38513 77.2218 4.3404 78.618 2.76974C80.1695 1.02445 82.4289 0 85.1038 0C89.5979 0 93.8406 4.07791 93.8406 9.14749C93.8406 14.7608 89.1832 19.3113 83.1517 19.3113C78.8502 19.3113 74.5179 16.5041 73.0053 12.5795C72.9999 12.565 72.9986 12.5492 73.0015 12.534C73.0218 12.4179 73.0617 12.3118 73.1011 12.2074C73.1583 12.0555 73.2143 11.907 73.2062 11.7359L73.18 11.1892C73.174 11.0569 73.2075 10.9258 73.2764 10.8127C73.3452 10.6995 73.4463 10.6094 73.5666 10.554L73.7852 10.4523C73.9077 10.3957 74.0148 10.3105 74.0976 10.204C74.1803 10.0974 74.2363 9.97252 74.2608 9.83983C74.3341 9.43894 74.6865 9.14749 75.0979 9.14749C75.7404 9.14749 76.299 9.57412 76.5088 10.1806C77.5188 13.1 79.1245 15.2457 83.0561 15.2457Z' fill='white'/%3E%3Cpath d='M155.758 6.91365C155.028 6.91365 154.804 6.47916 154.804 5.98857C154.804 5.46997 154.986 5.06348 155.758 5.06348C156.53 5.06348 156.712 5.46997 156.712 5.98857C156.712 6.47905 156.516 6.91365 155.758 6.91365ZM142.441 12.9304V9.32833L141.415 9.32323V8.90392C141.415 8.44719 141.786 8.07758 142.244 8.07986L142.441 8.08095V6.55306L144.082 6.09057V8.08073H145.569V8.50416C145.569 8.61242 145.548 8.71961 145.506 8.81961C145.465 8.91961 145.404 9.01047 145.328 9.08699C145.251 9.16351 145.16 9.2242 145.06 9.26559C144.96 9.30698 144.853 9.32826 144.745 9.32822H144.082V12.7201C144.082 13.2423 144.378 13.4256 144.76 13.4887C145.209 13.5629 145.583 13.888 145.583 14.343V14.9626C144.029 14.9626 142.441 14.8942 142.441 12.9304Z' fill='white'/%3E%3Cpath d='M110.058 7.92554C108.417 7.88344 106.396 8.92062 106.396 11.5137C106.396 14.0646 108.417 15.0738 110.058 15.0318C111.742 15.0738 113.748 14.0646 113.748 11.5137C113.748 8.92062 111.742 7.88344 110.058 7.92554ZM110.07 13.7586C108.878 13.7586 108.032 12.8905 108.032 11.461C108.032 10.1013 108.878 9.20569 110.071 9.20569C111.263 9.20569 112.101 10.0995 112.101 11.459C112.101 12.8887 111.263 13.7586 110.07 13.7586Z' fill='white'/%3E%3Cpath d='M118.06 7.94098C119.491 7.94098 120.978 8.33337 120.978 11.1366V14.893H120.063C119.608 14.893 119.238 14.524 119.238 14.0689V10.9965C119.238 9.66506 118.747 9.16047 117.891 9.16047C117.414 9.16047 116.797 9.52486 116.502 9.81915V14.069C116.502 14.1773 116.481 14.2845 116.44 14.3845C116.398 14.4845 116.337 14.5753 116.261 14.6519C116.184 14.7284 116.093 14.7891 115.993 14.8305C115.893 14.8719 115.786 14.8931 115.678 14.8931H114.847V8.10918H115.773C115.932 8.10914 116.087 8.16315 116.212 8.26242C116.337 8.36168 116.424 8.50033 116.46 8.65577C116.881 8.19328 117.428 7.94098 118.06 7.94098ZM122.854 8.09713C123.024 8.09708 123.19 8.1496 123.329 8.2475C123.468 8.34541 123.574 8.48391 123.631 8.64405L125.133 12.8486L126.635 8.64415C126.692 8.48402 126.798 8.34551 126.937 8.2476C127.076 8.1497 127.242 8.09718 127.412 8.09724H128.598L126.152 14.3567C126.091 14.5112 125.986 14.6439 125.849 14.7374C125.711 14.831 125.549 14.881 125.383 14.8809H124.333L121.668 8.09713H122.854Z' fill='white'/%3E%3Cpath d='M135.085 14.5514C134.566 14.7616 133.513 15.0416 132.418 15.0416C130.496 15.0416 129.024 13.9345 129.024 11.4396C129.024 9.19701 130.451 7.99792 132.191 7.99792C134.338 7.99792 135.254 9.4378 135.158 11.3979C135.139 11.8029 134.786 12.0983 134.38 12.0983H130.679C130.763 13.1916 131.562 13.7662 132.615 13.7662C133.028 13.7662 133.462 13.7452 133.983 13.6481C134.535 13.545 135.085 13.9375 135.085 14.4985V14.5514ZM133.673 10.949C133.785 9.87621 133.061 9.28752 132.191 9.28752C131.321 9.28752 130.734 9.93979 130.679 10.9489L133.673 10.949Z' fill='white'/%3E%3Cpath d='M137.345 8.11122C137.497 8.11118 137.645 8.16229 137.765 8.25635C137.884 8.35041 137.969 8.48197 138.005 8.62993C138.566 8.20932 139.268 7.94303 139.759 7.94303C139.801 7.94303 140.068 7.94303 140.489 7.99913V8.7265C140.489 9.11748 140.15 9.4147 139.759 9.4147C139.31 9.4147 138.651 9.5829 138.131 9.8773V14.8951H136.462V8.11112L137.345 8.11122ZM156.6 14.0508V8.09104H155.769C155.314 8.09104 154.944 8.45999 154.944 8.9151V14.8748H155.775C156.23 14.8748 156.6 14.5058 156.6 14.0508ZM158.857 12.9447V9.34254H157.749V8.91912C157.749 8.46401 158.118 8.09506 158.574 8.09506H158.857V6.56739L160.499 6.10479V8.09506H161.986V8.51848C161.986 8.97359 161.617 9.34254 161.161 9.34254H160.499V12.7345C160.499 13.2566 160.795 13.44 161.177 13.503C161.626 13.5774 162 13.9024 162 14.3574V14.977C160.446 14.977 158.857 14.9086 158.857 12.9447ZM98.1929 10.1124C98.2033 6.94046 100.598 5.16809 102.895 5.16809C104.171 5.16809 105.342 5.44285 106.304 6.12953L105.914 6.6631C105.654 7.02011 105.16 7.16194 104.749 6.99949C104.169 6.7702 103.622 6.7218 103.215 6.7218C101.335 6.7218 99.9169 7.92849 99.9068 10.1123C99.9169 12.2959 101.335 13.5201 103.215 13.5201C103.622 13.5201 104.169 13.4717 104.749 13.2424C105.16 13.0799 105.654 13.2046 105.914 13.5615L106.304 14.0952C105.342 14.7819 104.171 15.0566 102.895 15.0566C100.598 15.0566 98.2033 13.2842 98.1929 10.1124ZM147.619 5.21768C148.074 5.21768 148.444 5.58663 148.444 6.04174V9.81968L151.82 5.58131C151.897 5.47733 151.997 5.39282 152.112 5.3346C152.227 5.27638 152.355 5.24607 152.484 5.24611H153.984L150.166 10.0615L153.984 14.8749H152.484C152.355 14.8749 152.227 14.8446 152.112 14.7864C151.997 14.7281 151.897 14.6436 151.82 14.5397L148.444 10.3025V14.0508C148.444 14.5059 148.074 14.8749 147.619 14.8749H146.746V5.21768H147.619Z' fill='white'/%3E%3Cpath d='M0.773438 6.5752H2.68066C3.56543 6.5752 4.2041 6.7041 4.59668 6.96191C4.99219 7.21973 5.18994 7.62695 5.18994 8.18359C5.18994 8.55859 5.09326 8.87061 4.8999 9.11963C4.70654 9.36865 4.42822 9.52539 4.06494 9.58984V9.63379C4.51611 9.71875 4.84717 9.88721 5.05811 10.1392C5.27197 10.3882 5.37891 10.7266 5.37891 11.1543C5.37891 11.7314 5.17676 12.1841 4.77246 12.5122C4.37109 12.8374 3.81152 13 3.09375 13H0.773438V6.5752ZM1.82373 9.22949H2.83447C3.27393 9.22949 3.59473 9.16064 3.79688 9.02295C3.99902 8.88232 4.1001 8.64502 4.1001 8.31104C4.1001 8.00928 3.99023 7.79102 3.77051 7.65625C3.55371 7.52148 3.20801 7.4541 2.7334 7.4541H1.82373V9.22949ZM1.82373 10.082V12.1167H2.93994C3.37939 12.1167 3.71045 12.0332 3.93311 11.8662C4.15869 11.6963 4.27148 11.4297 4.27148 11.0664C4.27148 10.7324 4.15723 10.4849 3.92871 10.3237C3.7002 10.1626 3.35303 10.082 2.88721 10.082H1.82373Z' fill='white'/%3E%3Cpath d='M13.011 6.5752V10.7324C13.011 11.207 12.9084 11.623 12.7034 11.9805C12.5012 12.335 12.2068 12.6089 11.8201 12.8022C11.4363 12.9927 10.9763 13.0879 10.4402 13.0879C9.6433 13.0879 9.02368 12.877 8.5813 12.4551C8.13892 12.0332 7.91772 11.4531 7.91772 10.7148V6.5752H8.9724V10.6401C8.9724 11.1704 9.09546 11.5615 9.34155 11.8135C9.58765 12.0654 9.96557 12.1914 10.4753 12.1914C11.4656 12.1914 11.9607 11.6714 11.9607 10.6313V6.5752H13.011Z' fill='white'/%3E%3Cpath d='M15.9146 13V6.5752H16.9649V13H15.9146Z' fill='white'/%3E%3Cpath d='M19.9255 13V6.5752H20.9758V12.0991H23.696V13H19.9255Z' fill='white'/%3E%3Cpath d='M28.2828 13H27.2325V7.47607H25.3428V6.5752H30.1724V7.47607H28.2828V13Z' fill='white'/%3E%3Cpath d='M41.9472 13H40.8046L39.7148 9.16796C39.6679 9.00097 39.6093 8.76074 39.539 8.44727C39.4687 8.13086 39.4262 7.91113 39.4116 7.78809C39.3823 7.97559 39.3339 8.21875 39.2665 8.51758C39.2021 8.81641 39.1479 9.03905 39.1039 9.18554L38.0405 13H36.8979L36.0673 9.7832L35.2236 6.5752H36.2958L37.2143 10.3193C37.3578 10.9199 37.4604 11.4502 37.5219 11.9102C37.5541 11.6611 37.6025 11.3828 37.6669 11.0752C37.7314 10.7676 37.79 10.5186 37.8427 10.3281L38.8886 6.5752H39.9301L41.0024 10.3457C41.1049 10.6943 41.2133 11.2158 41.3276 11.9102C41.3715 11.4912 41.477 10.958 41.644 10.3105L42.558 6.5752H43.6215L41.9472 13Z' fill='white'/%3E%3Cpath d='M45.7957 13V6.5752H46.846V13H45.7957Z' fill='white'/%3E%3Cpath d='M52.0258 13H50.9755V7.47607H49.0859V6.5752H53.9155V7.47607H52.0258V13Z' fill='white'/%3E%3Cpath d='M61.2312 13H60.1765V10.104H57.2146V13H56.1643V6.5752H57.2146V9.20312H60.1765V6.5752H61.2312V13Z' fill='white'/%3E%3C/svg%3E");}@-webkit-keyframes formkit-bouncedelay-formkit-form-data-uid-96a310b6ce-{0%,80%,100%{-webkit-transform:scale(0);-ms-transform:scale(0);transform:scale(0);}40%{-webkit-transform:scale(1);-ms-transform:scale(1);transform:scale(1);}}@keyframes formkit-bouncedelay-formkit-form-data-uid-96a310b6ce-{0%,80%,100%{-webkit-transform:scale(0);-ms-transform:scale(0);transform:scale(0);}40%{-webkit-transform:scale(1);-ms-transform:scale(1);transform:scale(1);}}.formkit-form[data-uid="96a310b6ce"] blockquote{padding:10px 20px;margin:0 0 20px;border-left:5px solid #e1e1e1;}.formkit-form[data-uid="96a310b6ce"] .seva-custom-content{padding:15px;font-size:16px;color:#fff;mix-blend-mode:difference;}.formkit-form[data-uid="96a310b6ce"] .formkit-modal.guard{max-width:420px;width:100%;} .formkit-form[data-uid="96a310b6ce"]{max-width:700px;}.formkit-form[data-uid="96a310b6ce"] [data-style="clean"]{width:100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-fields{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap;margin:0 auto;}.formkit-form[data-uid="96a310b6ce"] .formkit-field,.formkit-form[data-uid="96a310b6ce"] .formkit-submit{margin:0 0 15px 0;-webkit-flex:1 0 100%;-ms-flex:1 0 100%;flex:1 0 100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit-container{margin:0;}.formkit-form[data-uid="96a310b6ce"] .formkit-submit{position:static;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] [data-style="clean"],.formkit-form[data-uid="96a310b6ce"][min-width~="800"] [data-style="clean"]{padding:10px;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"],.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"]{margin-left:-5px;margin-right:-5px;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-field,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-field,.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-submit,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-submit{margin:0 5px 15px 5px;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-field,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-field{-webkit-flex:100 1 auto;-ms-flex:100 1 auto;flex:100 1 auto;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-submit,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-submit{-webkit-flex:1 1 auto;-ms-flex:1 1 auto;flex:1 1 auto;} </style></form>
<hr>
<!-- Post comments -->
<script src="https://utteranc.es/client.js" repo="eugeneyan/eugeneyan-comments" issue-term="url" theme="github-light" crossorigin="anonymous" type="e98c7fd0fab5af1e4999090d-text/javascript">
</script>
</div>
</div>
<footer class="footer">
<div class="footer-col-wrapper">
<div class="col-sm-3 footer-col">
<ul class="contact-list">
<!-- <li>
<img class="icon" src="/assets/bluesky.svg" loading="lazy" alt=""/>
<a rel="me" href="https://bsky.app/profile/eugeneyan.com" target="_blank" title="Bluesky">Bluesky</a>
</li> -->
<li>
<img class="icon" src="/assets/icon-twitter.svg" loading="lazy" alt=""/>
<a href="https://twitter.com/eugeneyan" target="_blank" title="Twitter">Twitter</a>
</li>
<li>
<img class="icon" src="/assets/icon-linkedin.svg" loading="lazy" alt=""/>
<a href="https://www.linkedin.com/in/eugeneyan/" target="_blank" title="Linkedin">LinkedIn</a>
</li>
<!-- <li>
<img class="icon" src="/assets/icon-threads.svg" loading="lazy" alt=""/>
<a href="https://www.threads.net/@eugeneyan" target="_blank" title="Threads">Threads</a>
</li> -->
<li>
<img class="icon" src="/assets/icon-github.svg" loading="lazy" alt=""/>
<a href="https://github.com/eugeneyan/" target="_blank" title="GitHub">GitHub</a>
</li>
</ul>
</div>
<div class="col-sm-9 footer-col">
<p>I'm a Member of Technical Staff at Anthropic. I work to bridge the field and the frontier, and help build safe, reliable AI systems that scale. I've led ML/AI teams at Amazon, Alibaba, Lazada, and a Healthtech Series A, and write about LLMs, RecSys, and engineering at <a href="https://eugeneyan.com/" target="_blank">eugeneyan.com</a>.</p>
</div>
</div>
<p class="copyright">© Eugene Yan 2015 - 2026
• <a href="/site-feedback/">Feedback</a>
• <a href="/rss/">RSS</a>
</p>
</footer>
</div> <!-- /container -->
</div>
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e98c7fd0fab5af1e4999090d-|49" defer></script><script defer src="https://static.cloudflareinsights.com/beacon.min.js/v833ccba57c9e4d2798f2e76cebdd09a11778172276447" integrity="sha512-57MDmcccJXYtNnH+ZiBwzC4jb2rvgVCEokYN+L/nLlmO8rfYT/gIpW2A569iJ/3b+0UEasghjuZH/ma3wIs/EQ==" data-cf-beacon='{"version":"2024.11.0","token":"4ba4ab6acad14218941be7fa4aaad127","r":1,"server_timing":{"name":{"cfCacheStatus":true,"cfEdge":true,"cfExtPri":true,"cfL4":true,"cfOrigin":true,"cfSpeedBrain":true},"location_startswith":null}}' crossorigin="anonymous"></script>
</body>
<script type="e98c7fd0fab5af1e4999090d-text/javascript">
// Assemble mailto: from split data attributes on click (keeps full address out of source)
document.addEventListener('click', function(e) {
var a = e.target.closest('a.js-email');
if (!a) return;
e.preventDefault();
var d = a.dataset;
window.location.href = 'mailto:' + d.u + '@' + d.d + '.' + d.t;
});
// Ensure aa is loaded
document.addEventListener('DOMContentLoaded', function() {
if (typeof aa === 'function') {
// Get the current page path for more specific tracking
const pagePath = '/writing/semantic-ids/';
// Determine a specific event name based on the current page
let eventName;
if (pagePath.startsWith('/tag/')) {
eventName = 'Tag Page Link Clicked';
} else {
// Count the number of segments to determine if it's a site page or post page
const pathSegments = pagePath.split('/').filter(Boolean);
if (pathSegments.length <= 1) {
// Zero or one level deep (e.g., '/', '/writing/', '/speaking/')
eventName = 'Site Page Link Clicked';
} else {
// Two or more levels deep - considered a post
eventName = 'Post Link Clicked';
}
}
// Track clicks on internal links in the main container
const container = document.querySelector('div.container');
if (container) {
container.addEventListener('click', function(e) {
const link = e.target.closest('a');
if (link && !link.classList.contains('js-email') && link.href && link.origin === window.location.origin) {
const objectID = link.pathname;
aa('clickedObjectIDs', {
index: 'eugeneyan.com',
eventName: eventName,
objectIDs: [objectID]
});
}
});
}
}
});
// Track page read depth for conversion tracking
let hasTrackedPageRead = false;
window.addEventListener('scroll', function() {
if (hasTrackedPageRead) return; // Only track once per page view
// Calculate read depth as percentage
const scrollTop = window.pageYOffset || document.documentElement.scrollTop;
const scrollHeight = document.documentElement.scrollHeight;
const clientHeight = document.documentElement.clientHeight;
const readPercentage = (scrollTop + clientHeight) / scrollHeight * 100;
// If user has read at least 50% of the page
if (readPercentage >= 50 && typeof aa === 'function') {
hasTrackedPageRead = true;
const objectID = window.location.pathname; // Use current page path as objectID
const pagePath = '/writing/semantic-ids/';
// Create a meaningful event name
let eventName;
if (pagePath.startsWith('/tag/')) {
eventName = 'Tag Page Read 50%';
} else {
// Count the number of segments to determine if it's a site page or post page
const pathSegments = pagePath.split('/').filter(Boolean);
if (pathSegments.length <= 1) {
// Zero or one level deep (e.g., '/', '/writing/', '/speaking/')
eventName = 'Site Page Read 50%';
} else {
// Two or more levels deep - considered a post
eventName = 'Post Read 50%';
}
}
// Send the convertedObjectIDs event to Algolia
aa('convertedObjectIDs', {
index: 'eugeneyan.com',
eventName: eventName,
objectIDs: [objectID]
});
}
});
</script>
</html>