Eugene Yan · Tech & AI
TIER 4 2016-10-11
<!DOCTYPE html>
<html lang="en">
<head>
<!--Load inline css and scripts first-->
<style>:root{--c-light-text:#333;--c-light-background:#fff;--c-light-focus:#00ff00;--c-light-interactive:#007bff;--c-dark-text:#fff;--c-dark-subtext:#a6a6a6;--c-dark-background:#333;--c-dark-focus:#00ff00;--c-dark-interactive:#66b0ff;--c-dark-callout:#003166;--c-text:var(--c-light-text);--c-background:var(--c-light-background);--c-focus:var(--c-light-focus);--c-interactive:var(--c-light-interactive)}.dark-mode-checkbox:checked~.theme-container{--c-text:var(--c-dark-text);--c-background:var(--c-dark-background);--c-focus:var(--c-dark-focus);--c-interactive:var(--c-dark-interactive)}html.dark-mode{--c-text:var(--c-dark-text);--c-background:var(--c-dark-background);--c-focus:var(--c-dark-focus);--c-interactive:var(--c-dark-interactive)}html.dark-mode .tag{background-color:#3e3e3e;color:var(--c-dark-interactive)}html.dark-mode a.tag:hover{background-color:var(--c-dark-interactive);color:#3e3e3e}a{text-decoration:none;background-color:transparent;color:var(--c-interactive)}</style>
<!-- darkmode JS at start of the doc so to ensure consistent view mode -->
<link href="/js/darkmode.js" rel="preload" as="script">
<script src="/js/darkmode.js" type="e8a9aab4af92c290f6da290e-text/javascript"></script>
<!--Add active class to nav bar-->
<link href="/js/navbar.js" rel="preload" as="script">
<script src="/js/navbar.js" defer type="e8a9aab4af92c290f6da290e-text/javascript"></script>
<!-- Load jQuery before anchor.min.js -->
<link href="/js/jquery-3.7.1.min.js" rel="preload" as="script">
<script src="/js/jquery-3.7.1.min.js" defer type="e8a9aab4af92c290f6da290e-text/javascript"></script>
<!--Add anchors to headers-->
<link href="/js/anchor.min.js" rel="preload" as="script">
<script src="/js/anchor.min.js" defer type="e8a9aab4af92c290f6da290e-text/javascript"></script>
<!-- Algolia Insights -->
<script type="e8a9aab4af92c290f6da290e-text/javascript">
var ALGOLIA_INSIGHTS_SRC = "/js/search-insights.min.js"; // Using local version
!function(e,a,t,n,s,i,c){e.AlgoliaAnalyticsObject=s,e[s]=e[s]||function(){
(e[s].queue=e[s].queue||[]).push(arguments)},e[s].version=(n.match(/@([^\/]+)\/?.*/) || [])[1],i=a.createElement(t),c=a.getElementsByTagName(t)[0],
i.async=1,i.src=n,c.parentNode.insertBefore(i,c)
}(window,document,"script",ALGOLIA_INSIGHTS_SRC,"aa");
</script>
<script type="e8a9aab4af92c290f6da290e-text/javascript">
if (typeof aa === 'function') {
aa('init', {
appId: '2XJCLEABQD',
apiKey: 'b61ec4cb64bd32d62c053466fccbfa43',
useCookie: true
});
}
</script>
<meta charset="utf-8">
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=0.86, maximum-scale=3.0, minimum-scale=0.86">
<meta name="description" content="Parsing json and formatting product titles and categories.">
<meta name="author" content="Eugene Yan">
<meta content="eugeneyan.com" property="og:site_name">
<meta name=twitter:card content=summary_large_image>
<meta name=twitter:domain content=eugeneyan.com>
<meta content="Product Classification API Part 1: Data Acquisition" property="og:title">
<meta name=twitter:title content="Product Classification API Part 1: Data Acquisition">
<meta content="article" property="og:type">
<meta content="Parsing json and formatting product titles and categories." property="og:description">
<meta name=twitter:description content="Parsing json and formatting product titles and categories.">
<meta content="https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/" property="og:url">
<meta content="2016-10-11T00:00:00+00:00" property="article:published_time">
<meta content="https://eugeneyan.com/about/" property="article:author">
<meta content="https://eugeneyan.com/assets/og_image/default.jpg" property="og:image">
<meta name=twitter:image content="https://eugeneyan.com/assets/og_image/default.jpg">
<meta content="posts" property="article:section">
<meta content="machinelearning" property="article:tag">
<meta content="python" property="article:tag">
<meta content="🛠" property="article:tag">
<title>Product Classification API Part 1: Data Acquisition</title>
<!-- styles -->
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e8a9aab4af92c290f6da290e-|49"></script><link href="/css/main.min.css" rel="preload" as="style" onload="this.rel='stylesheet'" type="text/css">
<link rel="stylesheet" href="/css/main.css" type="text/css">
<!-- Preconnect to Google Fonts domains to reduce latency -->
<link rel="preconnect" href="https://fonts.googleapis.com" crossorigin>
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e8a9aab4af92c290f6da290e-|49"></script><link href="https://fonts.googleapis.com/css2?family=Merriweather:wght@400;700&family=Raleway&display=swap"
rel="preload" as="style" onload="this.rel='stylesheet'">
<link href="https://fonts.googleapis.com/css2?family=Merriweather:wght@400;700&family=Raleway&display=swap"
rel="stylesheet">
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e8a9aab4af92c290f6da290e-|49"></script><link href="/css/monokai.css" rel="preload" as="style" onload="this.rel='stylesheet'" type="text/css">
<link href="/css/monokai.css" rel="stylesheet" type="text/css">
<link rel="shortcut icon" type="image/png" href="https://eugeneyan.com/assets/favicon/favicon.ico">
<link rel="apple-touch-icon" sizes="180x180" href="https://eugeneyan.com/assets/favicon/apple-touch-icon.webp">
<link rel="icon" type="image/png" sizes="32x32" href="https://eugeneyan.com/assets/favicon/favicon-32x32.webp">
<link rel="icon" type="image/png" sizes="16x16" href="https://eugeneyan.com/assets/favicon/favicon-16x16.webp">
<link rel="manifest" href="/assets/favicon/site.webmanifest">
<link rel="canonical" href="https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/"/>
<!-- Collect tags-->
<!-- google analytics - i will not share this data with google -->
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4CKMNLRMCV" type="e8a9aab4af92c290f6da290e-text/javascript"></script>
<script type="e8a9aab4af92c290f6da290e-text/javascript">
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-4CKMNLRMCV');
</script>
</head>
<input class="dark-mode-checkbox" id="dark-mode" name="dark-mode-checkbox" type="checkbox"/>
<label class="dark-mode-label" for="dark-mode"></label>
<body>
<div class="theme-container grow">
<div class="container" style="width: 95%">
<div class="header">
<div class="row">
<div class="col-sm-3">
<h1 class="text-muted nav"><a href="/">eugeneyan</a></h1>
</div>
<div class="col-sm-9">
<ul id="nav" class="nav-margin nav nav-pills float-sm-right">
<li><a href="/start-here/" title="Start Here">Start Here</a></li>
<li><a href="/writing/" title="Writing">Writing</a></li>
<li><a href="/speaking/" title="Speaking">Speaking</a></li>
<li><a href="/prototyping/" title="Prototyping">Prototyping</a></li>
<li><a href="/about/" title="About">About</a></li>
<li><a href="/search/" title="Search"><img class="icon icon-search" src="/assets/icon-search.svg" loading="lazy" alt=""/></a></li>
</ul>
</div>
</div>
</div>
<div class="notes">
<div class="note single">
<h1 class="title">Product Classification API Part 1: Data Acquisition</h1>
<p class="date">
<info datetime="2016-10-11 00:00:00 +0000">
<span class="no-italics">[
<a class='tag' href="/tag/machinelearning/">machinelearning</a>
<a class='tag' href="/tag/python/">python</a>
<a class='tag' href="/tag/🛠/">🛠</a>
]
</span> · 9 min read
</info>
</p>
<!-- Post content -->
<div class="notebody">
<p>To gain practice with building data products end-to-end, I recently developed a product classification API. The API helps classify products based on its title—instead of figuring out which category your product belongs to (out of thousands), you can provide the title and the API returns the top 3 most likely categories. (<a href="https://github.com/eugeneyan/datagene" target="_blank">Github repositiory</a>)</p>
<blockquote>
<p>Update: API discontinued to save on cloud cost.</p>
</blockquote>
<p><img src="/assets/product-classification-results.webp" title="Product classification results" loading="lazy" alt="Product classification results" /></p>
<p class="image-caption">Input: Title. Output: Suggested categories.</p>
<p>This is part of a series of posts on building a product classification API:</p>
<ul>
<li><a href="/writing/product-categorization-api-part-1-data-acquisition-and-formatting/" target="_blank">Data acquisition and formatting (part 1)</a></li>
<li><a href="/writing/product-categorization-api-part-2-data-preparation/" target="_blank">Data cleaning and preparation (part 2)</a></li>
<li><a href="/writing/product-categorization-api-part-3-creating-an-api/" target="_blank">App development (part 3)</a></li>
<li><a href="/writing/image-categorization-is-now-live/" target="_blank">Image classification demo</a></li>
<li><a href="/writing/image-search-is-now-live/" target="_blank">Image search demo</a></li>
</ul>
<h2 id="where-did-i-get-the-product-data-from">Where did I get the product data from?</h2>
<p>I initially intended to build a web scraper to collect product data from Amazon’s and Alibaba’s sites. However, I figured this (skill) was not absolutely necessary, especially at work—product data would be stored and available in databases. In addition, the process of scraping and structuring scraped data was estimated to take up at least 30% of overall effort. Thus, I decided to use open-sourced product data instead.</p>
<p>It was surprisingly difficult to find good quality open-sourced product data. I was on the verge of building a scraper when I stumbled upon Julian McAuley’s site: <a href="http://jmcauley.ucsd.edu/data/amazon/" target="_blank">http://jmcauley.ucsd.edu/data/amazon/</a>. Acknowledgements to Julian and his team for the Amazon product data used to build this API.</p>
<p>For this project, we’ll be using the product metadata, containing 9.4 million products (3.1gb zipped). As we’ll see, after cleaning and preparation, only a fraction is usable for training a product classification model for the API.</p>
<h2 id="how-does-the-product-metadata-look-like">How does the product metadata look like?</h2>
<p>The metadata contains the following fields:</p>
<ul>
<li>asin: Product ID</li>
<li>title: Product name</li>
<li>price: Product price (in USD)</li>
<li>imUrl: Product image url</li>
<li>related: Related products (that Amazon recommends; e.g., “also bought”, “also viewed”, “bought together”, etc)</li>
<li>salesRank: Product rank in top-level category (based on number of sales over a period)</li>
<li>brand: Product brand</li>
<li>categories: List of categories the product belongs to</li>
</ul>
<p>Here’s how a product would look like in the original json format. I’ll be using this example throughout this post.</p>
<div class="language-json highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">{</span><span class="w">
</span><span class="nl">"asin"</span><span class="p">:</span><span class="w"> </span><span class="s2">"B0147ZZKQ2"</span><span class="p">,</span><span class="w">
</span><span class="nl">"title"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Onitsuka Tiger Ultimate 81 Running Shoe"</span><span class="p">,</span><span class="w">
</span><span class="nl">"price"</span><span class="p">:</span><span class="w"> </span><span class="mf">68.88</span><span class="p">,</span><span class="w">
</span><span class="nl">"imUrl"</span><span class="p">:</span><span class="w"> </span><span class="s2">"http://ecx.images-amazon.com/images/I/51fAmVkTbyT._SY300_.jpg"</span><span class="p">,</span><span class="w">
</span><span class="nl">"related"</span><span class="p">:</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nl">"also_bought"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">"B00JHONN1S"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B002BZX8Z6"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00D2K1M3O"</span><span class="p">,</span><span class="w"> </span><span class="s2">"0000031909"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00613WDTQ"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00D0WDS9A"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00D0GCI8S"</span><span class="p">,</span><span class="w"> </span><span class="s2">"0000031895"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B003AVKOP2"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B003AVEU6G"</span><span class="p">],</span><span class="w">
</span><span class="nl">"also_viewed"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">"B002BZX8Z6"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00JHONN1S"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B008F0SU0Y"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00D23MC6W"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00AFDOPDA"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00E1YRI4C"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B002GZGI4E"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B003AVKOP2"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00D9C1WBM"</span><span class="p">,</span><span class="w"> </span><span class="s2">"B00CEV8366"</span><span class="p">],</span><span class="w">
</span><span class="nl">"bought_together"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">"B002BZX8Z6"</span><span class="p">]</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="nl">"salesRank"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="nl">"Clothing, Shoes & Jewelry"</span><span class="p">:</span><span class="w"> </span><span class="mi">1368</span><span class="p">},</span><span class="w">
</span><span class="nl">"brand"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Onitsuka"</span><span class="p">,</span><span class="w">
</span><span class="nl">"categories"</span><span class="p">:</span><span class="w"> </span><span class="p">[[</span><span class="s2">"Clothing, Shoes & Jewelry"</span><span class="p">,</span><span class="w"> </span><span class="s2">"Men"</span><span class="p">,</span><span class="w"> </span><span class="s2">"Shoes"</span><span class="p">,</span><span class="w"> </span><span class="s2">"Fashion Sneakers"</span><span class="p">],</span><span class="w"> </span><span class="p">[</span><span class="s2">"Sports & Outdoors"</span><span class="p">,</span><span class="w"> </span><span class="s2">"Exercise & Fitness"</span><span class="p">,</span><span class="w"> </span><span class="s2">"Running"</span><span class="p">,</span><span class="w"> </span><span class="s2">"Footwear"</span><span class="p">]]</span><span class="w">
</span><span class="p">}</span><span class="w">
</span></code></pre></div></div>
<h2 id="converting-the-json-to-csv-format">Converting the JSON to CSV format</h2>
<p>The json data comes zipped. Here’s how we can read it into a pandas dataframe (and save to csv, if necessary):</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="n">pd</span>
<span class="kn">import</span> <span class="nn">gzip</span>
<span class="k">def</span> <span class="nf">parse</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">g</span> <span class="o">=</span> <span class="n">gzip</span><span class="p">.</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s">'rb'</span><span class="p">)</span>
<span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">g</span><span class="p">:</span>
<span class="k">yield</span> <span class="nb">eval</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">get_df</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">df</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">parse</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">df</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">d</span>
<span class="n">i</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">pd</span><span class="p">.</span><span class="n">DataFrame</span><span class="p">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">orient</span><span class="o">=</span><span class="s">'index'</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">get_df</span><span class="p">(</span><span class="s">'metadata.json.gz'</span><span class="p">)</span>
<span class="n">df</span><span class="p">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s">'metadata_csv'</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
</code></pre></div></div>
<p>The above approach reads the entire dataset and loads it into a dataframe. This is viable given the small data size (3.1gb). However, for larger datasets ( >100gb), we’ll need an alternative approach. One way is to read the zipped json and write to csv, row by row:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">gzip</span>
<span class="kn">import</span> <span class="nn">csv</span>
<span class="k">def</span> <span class="nf">parse</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="n">g</span> <span class="o">=</span> <span class="n">gzip</span><span class="p">.</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s">'rb'</span><span class="p">)</span>
<span class="k">for</span> <span class="n">l</span> <span class="ow">in</span> <span class="n">g</span><span class="p">:</span>
<span class="k">yield</span> <span class="nb">eval</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">json_to_csv</span><span class="p">(</span><span class="n">read_path</span><span class="p">,</span> <span class="n">write_path</span><span class="p">):</span>
<span class="n">csv_writer</span> <span class="o">=</span> <span class="n">csv</span><span class="p">.</span><span class="n">writer</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">write_path</span><span class="p">,</span> <span class="s">'w'</span><span class="p">))</span>
<span class="n">i</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">parse</span><span class="p">(</span><span class="n">read_path</span><span class="p">):</span>
<span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">header</span> <span class="o">=</span> <span class="n">d</span><span class="p">.</span><span class="n">keys</span><span class="p">()</span>
<span class="n">csv_writer</span><span class="p">.</span><span class="n">writerow</span><span class="p">(</span><span class="n">header</span><span class="p">)</span>
<span class="n">i</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="n">csv_writer</span><span class="p">.</span><span class="n">writerow</span><span class="p">(</span><span class="n">d</span><span class="p">.</span><span class="n">values</span><span class="p">())</span>
<span class="n">json_to_csv</span><span class="p">(</span><span class="s">'metadata.json.gz'</span><span class="p">,</span> <span class="s">'metadata.csv'</span><span class="p">)</span>
</code></pre></div></div>
<h2 id="formatting-and-cleaning-the-category-data">Formatting and cleaning the category data</h2>
<p>Now that we’ve parsed the product data into a dataframe (and saved it to csv), we can begin working with the category data. Here’s how the category data for the Onitsuka Tiger shoes looks like:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="p">[[</span><span class="s">"Clothing, Shoes & Jewelry"</span><span class="p">,</span> <span class="s">"Men"</span><span class="p">,</span> <span class="s">"Shoes"</span><span class="p">,</span> <span class="s">"Fashion Sneakers"</span><span class="p">],</span>
<span class="p">[</span><span class="s">"Sports & Outdoors"</span><span class="p">,</span> <span class="s">"Exercise & Fitness"</span><span class="p">,</span> <span class="s">"Running"</span><span class="p">,</span> <span class="s">"Footwear”]]
</span></code></pre></div></div>
<p>The shoes are listed under two categories: “Clothing, Shoes, and Jewelry” (CSJ) and “Sports & Outdoors” (S&O). This means they are cross-listed across both categories, ensuring they can be found if you browse either the CSJ or the S&O catalogue. (Yes, they’re so badass they qualify as both fashion sneakers and running footwear; check out the top review here: <a href="https://www.amazon.com/review/RSJEH3HL83AOB/" target="_blank">Whappoww!! Ninja sneaks for bosses of chill</a>)</p>
<p>The API will classify a product into its primary category (i.e., the first category provided), ensuring a one-to-one relationship between products and their respective categories. Often, guidelines exist for which category should be the primary one (e.g., Shoes should have CSJ categories as their primary category)</p>
<h2 id="converting-category-data-into-category-path-strings">Converting category data into category path strings</h2>
<p>First, we’ll parse the category from a list of lists to a string. Simultaneously, we’ll keep only the primary categories (i.e., first category in the list). After parsing, this is the resulting category:</p>
<p><code class="language-python highlighter-rouge"><span class="s">"Clothing, Shoes & Jewelry -> Men -> Shoes -> Fashion Sneakers"</span></code></p>
<p>Note: My preference is to convert the category data into a category path, a single string connected by arrows (->). However, any format should work fine.</p>
<p>Here’s the code to how it’s done:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="k">def</span> <span class="nf">get_category_path</span><span class="p">(</span><span class="n">category_path_list</span><span class="p">):</span>
<span class="s">"""
(Str of list of list(s)) -> str
Returns the category path given a string of list of lists of
categories. If there are more than one list of categories provided,
returns the category path from the first list.
>>> get_category_path("[['A', 'B', 'C'], ['D', 'E', 'F', 'G']]")
'A -> B -> C'
>>> get_category_path("[['P1', 'P2', 'P3', 'P4']]")
'P1 -> P2 -> P3 -> P4'
:type category_path_list: str
:param category_path_list: A string containing a list of at least
one list of categories
:return: A string showing the full category path of the FIRST
category in the list (assumed to be primary category)
"""</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="s">' -> '</span><span class="p">.</span><span class="n">join</span><span class="p">(</span><span class="nb">eval</span><span class="p">(</span><span class="n">category_path_list</span><span class="p">)[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">except</span> <span class="nb">IndexError</span><span class="p">:</span> <span class="c1"># Error if the outer list is empty
</span> <span class="k">return</span> <span class="s">'no_category'</span>
<span class="k">except</span> <span class="nb">TypeError</span><span class="p">:</span> <span class="c1"># Error if the outer list is missing
</span> <span class="k">return</span> <span class="s">'no_category'</span>
<span class="c1"># Create column for category path
</span><span class="n">df</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s">'categories'</span><span class="p">].</span><span class="nb">apply</span><span class="p">(</span><span class="n">get_category_path</span><span class="p">)</span>
</code></pre></div></div>
<h2 id="exclude-data-where-title-or-category-is-missing">Exclude data where title or category is missing</h2>
<p>If either the title or category is missing, we won’t be able to use the product data to train our model. Thus, we’ll exclude products with incomplete data, as such:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">df</span><span class="p">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s">'title'</span><span class="p">],</span> <span class="n">inplace</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">]</span> <span class="o">!=</span> <span class="s">'no_category'</span><span class="p">]</span>
</code></pre></div></div>
<p>We started with 9.43 million products. More than 1 million have either missing title or category, leaving us with 7.98 million products.</p>
<h2 id="exclude-certain-categories">Exclude certain categories</h2>
<p>There are some categories of products where the title does not provide any information about the product category (e.g., Books, Movies, etc). These products are usually classified via alternative approaches. For example, the availability of ISBN data indicates the product is a book, while the availability of ratings (e.g., PG-13) indicates the product is a movie.</p>
<p>We’ll exclude certain categories of products as follows, leaving us with 5.59 million products:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s">'category_path_lvl1'</span><span class="p">]</span> <span class="o">!=</span> <span class="s">'Books'</span><span class="p">]</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s">'category_path_lvl1'</span><span class="p">]</span> <span class="o">!=</span> <span class="s">'CDs & Vinyl'</span><span class="p">]</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s">'category_path_lvl1'</span><span class="p">]</span> <span class="o">!=</span> <span class="s">'Movies & TV'</span><span class="p">]</span>
</code></pre></div></div>
<p>Note: You may have noticed that the code above filters on the column category_path_lvl1. This column contains the top level categories for products (e.g., “Electronics”, “Clothing, Shoes, and Jewelry”, Sports & Outdoors”). I’ll leave deriving this as an exercise for the audience ;)</p>
<h2 id="exclude-non-deepestnon-narrowest-categories">Exclude non-deepest/non-narrowest categories</h2>
<p>In the list of categories, we’ll find some that seem to stop halfway, such as the first category path below:</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="s">"Clothing, Shoes & Jewelry -> Men -> Shoes"</span>
<span class="s">"Clothing, Shoes & Jewelry -> Men -> Shoes -> Fashion Sneakers"</span>
</code></pre></div></div>
<p>In this case, the latter category is deeper (and narrower) than the former. Classifying products to the deepest category helps shoppers find relevant products easier (given that the category is narrower). Thus, we’ll exclude products that are not at the deepest category.</p>
<p>Here’s one way to do it. We’ll first sort the categories and compare each category path with the next. If the category path is not in the next (i.e., the category path is not a substring of the next category), then it is a deepest category and we append it to a list. We’ll then keep only products with categories in our list.</p>
<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Create df of category path counts
</span><span class="n">category_path_df</span> <span class="o">=</span> <span class="n">df</span><span class="p">.</span><span class="n">groupby</span><span class="p">(</span><span class="s">'category_path'</span><span class="p">).</span><span class="n">agg</span><span class="p">({</span><span class="s">'title'</span><span class="p">:</span> <span class="s">'count'</span><span class="p">}).</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s">'title'</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="bp">False</span><span class="p">).</span><span class="n">reset_index</span><span class="p">()</span>
<span class="n">category_path_df</span><span class="p">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s">'category_path'</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="n">category_path_df</span><span class="p">[</span><span class="s">'category_path_next'</span><span class="p">]</span> <span class="o">=</span> <span class="n">category_path_df</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">].</span><span class="n">shift</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">category_path_df</span><span class="p">.</span><span class="n">fillna</span><span class="p">(</span><span class="s">'no_comparison'</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="c1"># Create list of category_paths which are deepest category
</span><span class="n">category_path_list</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">category_path_df</span><span class="p">.</span><span class="n">iterrows</span><span class="p">():</span>
<span class="n">category_path</span> <span class="o">=</span> <span class="n">value</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">]</span>
<span class="n">category_path_next</span> <span class="o">=</span> <span class="n">value</span><span class="p">[</span><span class="s">'category_path_next'</span><span class="p">]</span>
<span class="k">if</span> <span class="n">category_path</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">category_path_next</span><span class="p">:</span>
<span class="n">category_path_list</span><span class="p">.</span><span class="n">append</span><span class="p">(</span><span class="n">category_path</span><span class="p">)</span>
<span class="c1"># Create df of category_path
</span><span class="n">category_path_df</span> <span class="o">=</span> <span class="n">pd</span><span class="p">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">category_path_list</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">])</span>
<span class="c1"># Keep only rows where the category is in category_df
</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">].</span><span class="n">isin</span><span class="p">(</span><span class="n">category_path_df</span><span class="p">[</span><span class="s">'category_path'</span><span class="p">])]</span>
</code></pre></div></div>
<p>Initially, we had 17.6 k categories. After excluding non-deepest categories, about 15 k categories (and 4.61 mil products) remain.</p>
<h2 id="exclude-categories-that-have-too-few-products">Exclude categories that have too few products</h2>
<p>Lastly, to ensure sufficient data to split into train and test sets, and train our model, we’ll exclude categories with less than 10 products. With a 50-50 train-test split, we’ll have at least five products to train per category. This shouldn’t be too difficult and I encourage you to try it out yourself.</p>
<p>After excluding products based on this condition, we’re left with 4.59 mil products.</p>
<h2 id="and-were-done">And we’re done!</h2>
<p>Congratulations on making it this far!</p>
<p>We’re done with the key steps to cleaning the category data. There may also be other cleaning involved, such as excluding temporary categories (i.e., “Black Friday Sales”, “11-11 Sales”, etc) but we shall not cover them.</p>
<p>As shown, there’s a lot of work to be done in acquiring, formatting, and cleaning of the data before we get to building a model to classify products. In this case, I’m thankful to Julian McAudley and his kind sharing of the Amazon product metadata.</p>
<p>In the next article, we go into—yeap, you guessed it—more data cleaning and preparation (specific to titles). This basic data preparation is key to training our classifier with high accuracy.</p>
<p>p.s., I would greatly appreciate any feedback on process, code, writing style, etc in the comments below. Thank you!</p>
<br>
<p>If you found this useful, please cite this write-up as:</p>
<blockquote class="blockquote-citation">
<p>Yan, Ziyou. (Oct 2016). Product Classification API Part 1: Data Acquisition. eugeneyan.com.
https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/.</p>
</blockquote>
<p>or</p>
<div class="citation"><pre><code>@article{yan2016acquisition,
title = {Product Classification API Part 1: Data Acquisition},
author = {Yan, Ziyou},
journal = {eugeneyan.com},
year = {2016},
month = {Oct},
url = {https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/}
}</code></pre>
</div>
<br>
<style>
#share-buttons {
display: inline-block;
vertical-align: middle;
}
#share-buttons:after {
content: "";
display: block;
clear: both;
}
#share-buttons > div {
position: relative;
text-align: left;
height: 36px;
width: 32px;
float: left;
text-align: center;
}
#share-buttons > div > svg {
height: 16px;
fill: #808080;
margin-top: 10px;
}
#share-buttons > div:hover {
cursor: pointer;
}
</style>
<span style="font-size: 18px">Share on: </span>
<div id="share-buttons">
<div class="twitter" title="Share this on Twitter" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://twitter.com/intent/tweet?text=Great read! Product Classification API Part 1: Data Acquisition&url=https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/&via=eugeneyan', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e8a9aab4af92c290f6da290e-="">
<img class="icon about-icon-large" src="/assets/icon-twitter.svg" loading="lazy" alt=""/>
</div>
<div class="linkedin" title="Share this on Linkedin" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://www.linkedin.com/shareArticle?mini=true&url=https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/&source=eugeneyan.com', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e8a9aab4af92c290f6da290e-="">
<img class="icon about-icon-large" src="/assets/icon-linkedin.svg" loading="lazy" alt=""/>
</div>
<div class="bluesky" title="Share this on Bluesky" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://bsky.app/intent/compose?text=Great read! Product Classification API Part 1: Data Acquisition https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e8a9aab4af92c290f6da290e-="">
<img class="icon about-icon-large" src="/assets/bluesky.svg" loading="lazy" alt=""/>
</div>
<div class="facebook fb-share-button" title="Share this on Facebook" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('https://www.facebook.com/dialog/share?app_id=249237293114028&display=popup&href=https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/&&redirect_uri=https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/', 'pop-up', 'left=20,top=20,width=500,height=500,toolbar=1,resizable=0');" data-cf-modified-e8a9aab4af92c290f6da290e-="">
<img class="icon about-icon-large" src="/assets/icon-facebook.svg" loading="lazy" alt=""/>
</div>
<div class="mail" title="Share this through Email" onclick="if (!window.__cfRLUnblockHandlers) return false; window.open('mailto:?subject=Great read! Product Classification API Part 1: Data Acquisition&body=https://eugeneyan.com/writing/product-categorization-api-part-1-data-acquisition-and-formatting/');" data-cf-modified-e8a9aab4af92c290f6da290e-="">
<img class="icon about-icon-large" src="/assets/icon-mail.svg" loading="lazy" alt=""/>
</div>
</div>
</div>
<!-- Page navigation -->
<hr>
<div id="algolia-recs-container" style="display: none;">
<div id="algolia-related-products" style="margin-bottom: 2em;"></div>
<div id="algolia-fbt"></div>
<style>
/* Common styles for both recommendation widgets */
.algolia-recs-section-header {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 1em; /* Adjust as needed, smaller than default h3 */
font-weight: bold;
margin-top: 0; /* Remove or reduce top margin */
margin-bottom: 15px; /* Space between header and recommendation cards */
color: var(--c-text); /* Use theme's text color */
font-style: italic;
}
/* Related Products Widget Styles */
#algolia-related-products .ais-RelatedProducts-list {
display: flex;
flex-direction: row; /* Arrange items horizontally */
flex-wrap: nowrap; /* Prevent wrapping to new lines, if possible */
justify-content: flex-start; /* Align items to the start of the container */
padding-left: 0; /* Remove default list padding */
list-style-type: none; /* Remove list bullets */
margin: 0;
}
#algolia-related-products .ais-RelatedProducts-item {
width: 32%; /* Adjust for 3 items: 32% * 3 items + 2% * 2 margins = 100% */
margin-right: 2%;
box-sizing: border-box; /* Include padding and border in the element's total width */
/* Optional: Basic card styling (uncomment to use) */
border: 1px solid color-mix(in srgb, var(--c-background) 85%, var(--c-text) 15%); /* Theme-aware light grey border */
padding: 0; /* Remove overall card padding, will be handled by elements */
text-align: left; /* Or 'center' if you prefer */
background-color: var(--c-background);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
#algolia-related-products .ais-RelatedProducts-item:last-child {
margin-right: 0; /* No margin for the last item in the row */
}
/* Styling for images within recommendation items */
#algolia-related-products .ais-RelatedProducts-item img {
display: block; /* Can help remove extra space below image */
width: calc(100% - 4px); /* Full width minus 2px L/R margins */
max-width: 100%; /* Ensures image does not exceed container if intrinsically smaller */
/* height: auto; -- Controlled by inline style's max-height and object-fit */
object-fit: cover; /* Ensure image covers the area, also in inline style */
margin: 2px; /* 2px margin on top, left, right. Bottom is overridden by inline style. */
/* margin-bottom: 8px; -- This is set by inline style in JS template */
}
/* Styling for the wrapper link to make the whole card clickable */
#algolia-related-products .ais-RelatedProducts-item a.ais-RelatedProducts-item-link-wrapper {
display: block; /* Make the link fill the list item */
text-decoration: none; /* Remove underline */
color: inherit; /* Use parent's text color */
}
#algolia-related-products .related-product-title {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
display: -webkit-box;
-webkit-line-clamp: 2; /* Limit to 2 lines for WebKit browsers */
line-clamp: 2; /* Standard property */
-webkit-box-orient: vertical;
overflow: hidden;
text-overflow: ellipsis;
padding: 0 7px 7px 7px; /* 0 top, 7px L/R/B for text area */
line-height: 1.5; /* Adjust for better readability */
height: 3.3em; /* Current height: 3.3em. For 2 lines with 0.75em font & 1.5 line-height, calculated height would be 2.25em. */
color: var(--c-interactive); /* Use theme's interactive color */
}
/* This container will wrap the image and score, taking the original image's layout space. */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container {
position: relative; /* For positioning the score absolutely within */
display: block; /* Matches original image display and ensures proper block layout */
width: calc(100% - 4px); /* Adopts width from original image styling */
margin: 2px; /* Adopts margin from original image styling */
margin-bottom: 8px; /* Adopts specific bottom margin from original image's inline style */
line-height: 0; /* Prevents unexpected space if child elements are treated as inline */
}
/* The image itself, now filling the container */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container img {
display: block;
width: 100%; /* Fill the container's width */
max-width: 20em; /* Optional: retain original max-width constraint for the image content */
height: auto; /* Maintain aspect ratio by default */
max-height: 12em; /* Constrain image height (adjust as needed) */
object-fit: cover; /* Ensures image covers the allocated space, cropping if necessary */
margin: 0 auto; /* Center image if max-width kicks in and it's narrower than container */
}
/* The score overlay box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score {
position: absolute;
bottom: 3px; /* Padding from the bottom edge of the container */
right: 3px; /* Padding from the right edge of the container */
background-color: color-mix(in srgb, var(--c-background) 85%, var(--green) 15%); /* Theme-aware light green */
color: var(--green); /* Theme's green color for text */
padding: 3px 6px; /* Slightly adjusted padding */
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
font-weight: bold;
border-radius: 10px; /* More rounded corners like the example */
border: 1px solid var(--green); /* Theme's green color for border */
line-height: 1; /* Critical for small text in a small box */
z-index: 10; /* Ensure it's above the image */
box-shadow: 0 1px 2px rgba(0,0,0,0.15); /* Softer shadow */
display: flex; /* To align icon and text nicely */
align-items: center; /* Vertically center icon and text */
}
/* Styling for the SVG icon within the score box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score-icon {
width: 0.9em; /* Scale with score's font size */
height: 0.9em;
vertical-align: -0.1em; /* Fine-tune vertical alignment */
margin-right: 4px; /* Space between icon and score number */
fill: var(--green); /* Theme's green color for icon */
}
/* Frequently Bought Together Widget Styles */
#algolia-fbt .ais-FrequentlyBoughtTogether-list {
display: flex;
flex-direction: row; /* Arrange items horizontally */
flex-wrap: nowrap; /* Prevent wrapping to new lines, if possible */
justify-content: flex-start; /* Align items to the start of the container */
padding-left: 0; /* Remove default list padding */
list-style-type: none; /* Remove list bullets */
margin: 0;
}
#algolia-fbt .ais-FrequentlyBoughtTogether-item {
width: 32%; /* Adjust for 3 items: 32% * 3 items + 2% * 2 margins = 100% */
margin-right: 2%;
box-sizing: border-box; /* Include padding and border in the element's total width */
/* Optional: Basic card styling (uncomment to use) */
border: 1px solid color-mix(in srgb, var(--c-background) 85%, var(--c-text) 15%); /* Theme-aware light grey border */
padding: 0; /* Remove overall card padding, will be handled by elements */
text-align: left; /* Or 'center' if you prefer */
background-color: var(--c-background);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
#algolia-fbt .ais-FrequentlyBoughtTogether-item:last-child {
margin-right: 0; /* No margin for the last item in the row */
}
/* Styling for images within recommendation items */
#algolia-fbt .ais-FrequentlyBoughtTogether-item img {
display: block; /* Can help remove extra space below image */
width: calc(100% - 4px); /* Full width minus 2px L/R margins */
max-width: 100%; /* Ensures image does not exceed container if intrinsically smaller */
/* height: auto; -- Controlled by inline style's max-height and object-fit */
object-fit: cover; /* Ensure image covers the area, also in inline style */
margin: 2px; /* 2px margin on top, left, right. Bottom is overridden by inline style. */
/* margin-bottom: 8px; -- This is set by inline style in JS template */
}
/* Styling for the wrapper link to make the whole card clickable */
#algolia-fbt .ais-FrequentlyBoughtTogether-item a.ais-FrequentlyBoughtTogether-item-link-wrapper {
display: block; /* Make the link fill the list item */
text-decoration: none; /* Remove underline */
color: inherit; /* Use parent's text color */
}
#algolia-fbt .fbt-product-title {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
display: -webkit-box;
-webkit-line-clamp: 2; /* Limit to 2 lines for WebKit browsers */
line-clamp: 2; /* Standard property */
-webkit-box-orient: vertical;
overflow: hidden;
text-overflow: ellipsis;
padding: 0 7px 7px 7px; /* 0 top, 7px L/R/B for text area */
line-height: 1.5; /* Adjust for better readability */
height: 3.3em; /* Current height: 3.3em. For 2 lines with 0.75em font & 1.5 line-height, calculated height would be 2.25em. */
color: var(--c-interactive); /* Use theme's interactive color */
}
/* This container will wrap the image and score, taking the original image's layout space. */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-image-container {
position: relative; /* For positioning the score absolutely within */
display: block; /* Matches original image display and ensures proper block layout */
width: calc(100% - 4px); /* Adopts width from original image styling */
margin: 2px; /* Adopts margin from original image styling */
margin-bottom: 8px; /* Adopts specific bottom margin from original image's inline style */
line-height: 0; /* Prevents unexpected space if child elements are treated as inline */
}
/* The image itself, now filling the container */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-image-container img {
display: block;
width: 100%; /* Fill the container's width */
max-width: 20em; /* Optional: retain original max-width constraint for the image content */
height: auto; /* Maintain aspect ratio by default */
max-height: 12em; /* Constrain image height (adjust as needed) */
object-fit: cover; /* Ensures image covers the allocated space, cropping if necessary */
margin: 0 auto; /* Center image if max-width kicks in and it's narrower than container */
}
/* The score overlay box */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-score {
position: absolute;
bottom: 3px; /* Padding from the bottom edge of the container */
right: 3px; /* Padding from the right edge of the container */
background-color: color-mix(in srgb, var(--c-background) 85%, var(--green) 15%); /* Theme-aware light green */
color: var(--green); /* Theme's green color for text */
padding: 3px 6px; /* Slightly adjusted padding */
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
font-weight: bold;
border-radius: 10px; /* More rounded corners like the example */
border: 1px solid var(--green); /* Theme's green color for border */
line-height: 1; /* Critical for small text in a small box */
z-index: 10; /* Ensure it's above the image */
box-shadow: 0 1px 2px rgba(0,0,0,0.15); /* Softer shadow */
display: flex; /* To align icon and text nicely */
align-items: center; /* Vertically center icon and text */
}
/* Styling for the SVG icon within the score box */
#algolia-fbt .ais-FrequentlyBoughtTogether-item .recommendation-score-icon {
width: 0.9em; /* Scale with score's font size */
height: 0.9em;
vertical-align: -0.1em; /* Fine-tune vertical alignment */
margin-right: 4px; /* Space between icon and score number */
fill: var(--green); /* Theme's green color for icon */
}
</style>
<script type="e8a9aab4af92c290f6da290e-text/javascript">
// Function to load a script and return a promise
function loadScript(src) {
return new Promise((resolve, reject) => {
if (document.querySelector(`script[src="${src}"]`)) {
resolve(); // Already loaded
return;
}
const script = document.createElement('script');
script.src = src;
script.onload = () => resolve();
script.onerror = () => reject(new Error(`Script load error for ${src}`));
document.head.appendChild(script);
});
}
function initAlgoliaRecommendations() {
Promise.all([
loadScript('https://cdn.jsdelivr.net/npm/algoliasearch@4/dist/algoliasearch-lite.umd.js'),
loadScript('https://cdn.jsdelivr.net/npm/instantsearch.js@4')
])
.then(() => {
// Initialize the Related Products widget
initRelatedProducts();
// Initialize the Frequently Bought Together widget
initFrequentlyBoughtTogether();
})
.catch(error => {
// Handle error silently
});
}
function initRelatedProducts() {
// Ensure container exists
if (!document.getElementById('algolia-related-products')) {
// Container not found, stopping initialization
return;
}
const recSearchClient = algoliasearch(
'2XJCLEABQD',
'b61ec4cb64bd32d62c053466fccbfa43'
);
const relatedSearch = instantsearch({
indexName: 'eugeneyan.com',
searchClient: recSearchClient,
clickAnalytics: true,
insights: true, // Enable insights for click tracking on recommendations
});
relatedSearch.addWidgets([
instantsearch.widgets.relatedProducts({
container: '#algolia-related-products',
objectIDs: ['/writing/product-categorization-api-part-1-data-acquisition-and-formatting/'],
limit: 3,
queryParameters: {
attributesToRetrieve: ['title', 'url', 'image', 'score', '_score'], // Specify only needed attributes
attributesToHighlight: [], // Disable highlighting
attributesToSnippet: [] // Disable snippeting
},
translations: {
title: '', // Custom title is in _layouts/post.html
},
transformItems: function(items) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
// Hide the Related Products container element first if there are no items
if (items.length === 0 && relatedElement) {
relatedElement.style.display = 'none';
} else if (relatedElement) {
relatedElement.style.display = 'block';
}
// Then handle the main recommendations container visibility
if (items.length > 0 && containerElement) {
containerElement.style.display = 'block';
} else if (containerElement && !document.getElementById('algolia-fbt').hasChildNodes()) {
containerElement.style.display = 'none';
} else {
}
return items;
},
templates: {
header() { // Removed unused results, html parameters
// Return a PLAIN string for the header
return '<h4 class="algolia-recs-section-header">You Might Also Like (content-based)</h4>';
},
item: function(hit, { html, sendEvent }) { // Added sendEvent to params
const itemUrl = `${hit.url || '#'}`;
const indexName = 'eugeneyan.com'; // Get index name for insights
let imageUrl;
// Ensure hit.image is not null, undefined, or an empty/whitespace string before using it.
if (hit.image && typeof hit.image === 'string' && hit.image.trim() !== '') {
imageUrl = `/assets/og_image/${hit.image}`;
} else {
imageUrl = `/assets/og_image/default-v4.jpg`; // Default image
}
let scoreValue = null;
if (typeof hit.score === 'number') scoreValue = hit.score.toFixed(2);
else if (typeof hit._score === 'number') scoreValue = hit._score.toFixed(2);
// else if (typeof hit.your_custom_score_field === 'number') scoreValue = hit.your_custom_score_field.toFixed(2);
const scoreElement = scoreValue ? `<div class="recommendation-score"><svg viewbox="0 0 24 24" class="recommendation-score-icon" xmlns="http://www.w3.org/2000/svg"><path d="M16 6l2.29 2.29-4.88 4.88-4-4L2 16.59 3.41 18l6-6 4 4 6.3-6.29L22 12V6h-6z"></path></svg>${scoreValue}</div>` : '';
const imageAndScoreTag = `
<div class="recommendation-image-container">
<img src="${imageUrl}" alt="${hit.title || 'Recommendation cover image'}">
${scoreElement}
</div>`;
const title = hit.title || 'Untitled Post';
return `
<a href="${itemUrl}"
class="ais-RelatedProducts-item-link-wrapper"
onClick="${() => {
sendEvent('click', hit, 'Related Item Clicked');
}}"
>
${imageAndScoreTag}
<div class="related-product-title">${title}</div>
</a>
`;
},
empty(results, { html }) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
// Hide Related Products container since there are no results
if (relatedElement) {
relatedElement.style.display = 'none';
}
// Check if the FBT widget also has no items before hiding the main container
const fbtWidgetContainer = document.getElementById('algolia-fbt');
if (containerElement && (!fbtWidgetContainer || !fbtWidgetContainer.hasChildNodes())) {
containerElement.style.display = 'none';
}
// Return an empty string to prevent rendering 'undefined'
return '';
}
}
})
]);
relatedSearch.start();
}
function initFrequentlyBoughtTogether() {
// Ensure container exists
if (!document.getElementById('algolia-fbt')) {
// Container not found, stopping initialization
return;
}
const recSearchClient = algoliasearch(
'2XJCLEABQD',
'b61ec4cb64bd32d62c053466fccbfa43'
);
const fbtSearch = instantsearch({
indexName: 'eugeneyan.com',
searchClient: recSearchClient,
clickAnalytics: true,
insights: true, // Enable insights for click tracking on recommendations
});
fbtSearch.addWidgets([
instantsearch.widgets.frequentlyBoughtTogether({
container: '#algolia-fbt',
objectIDs: ['/writing/product-categorization-api-part-1-data-acquisition-and-formatting/'],
limit: 3,
queryParameters: {
attributesToRetrieve: ['title', 'url', 'image', 'score', '_score'], // Specify only needed attributes
attributesToHighlight: [], // Disable highlighting
attributesToSnippet: [] // Disable snippeting
},
translations: {
title: '', // Custom title is in _layouts/post.html
},
transformItems: function(items) {
const containerElement = document.getElementById('algolia-recs-container');
if (items.length > 0 && containerElement) {
containerElement.style.display = 'block';
} else if (containerElement && !document.getElementById('algolia-related-products').hasChildNodes()) {
containerElement.style.display = 'none';
} else {
}
return items;
},
templates: {
header() {
// Return a PLAIN string for the header - only if we have items to display
// The header should not render if there's no content
return '<h4 class="algolia-recs-section-header">Frequently Read Together (behavioral-based)</h4>';
},
item: function(hit, { html, sendEvent }) {
const itemUrl = `${hit.url || '#'}`;
const indexName = 'eugeneyan.com'; // Get index name for insights
let imageUrl;
// Ensure hit.image is not null, undefined, or an empty/whitespace string before using it.
if (hit.image && typeof hit.image === 'string' && hit.image.trim() !== '') {
imageUrl = `/assets/og_image/${hit.image}`;
} else {
imageUrl = `/assets/og_image/default-v4.jpg`; // Default image
}
let scoreValue = null;
if (typeof hit.score === 'number') scoreValue = hit.score.toFixed(2);
else if (typeof hit._score === 'number') scoreValue = hit._score.toFixed(2);
// else if (typeof hit.your_custom_score_field === 'number') scoreValue = hit.your_custom_score_field.toFixed(2);
const scoreElement = scoreValue ? `<div class="recommendation-score"><svg viewbox="0 0 24 24" class="recommendation-score-icon" xmlns="http://www.w3.org/2000/svg"><path d="M16 6l2.29 2.29-4.88 4.88-4-4L2 16.59 3.41 18l6-6 4 4 6.3-6.29L22 12V6h-6z"></path></svg>${scoreValue}</div>` : '';
const imageAndScoreTag = `
<div class="recommendation-image-container">
<img src="${imageUrl}" alt="${hit.title || 'Recommendation cover image'}">
${scoreElement}
</div>`;
const title = hit.title || 'Untitled Post';
return `
<a href="${itemUrl}"
class="ais-FrequentlyBoughtTogether-item-link-wrapper"
onClick="${() => {
sendEvent('click', hit, 'FBT Item Clicked');
}}"
>
${imageAndScoreTag}
<div class="fbt-product-title">${title}</div>
</a>
`;
},
empty(results, { html }) {
const containerElement = document.getElementById('algolia-recs-container');
const fbtElement = document.getElementById('algolia-fbt');
// Hide FBT container since there are no results
if (fbtElement) {
fbtElement.style.display = 'none';
}
// Check if the Related Products widget also has no items before hiding the main container
const relatedWidgetContainer = document.getElementById('algolia-related-products');
if (containerElement && (!relatedWidgetContainer || !relatedWidgetContainer.hasChildNodes())) {
containerElement.style.display = 'none';
}
// Return an empty string to prevent rendering 'undefined'
return '';
}
}
})
]);
fbtSearch.start();
}
document.addEventListener('DOMContentLoaded', function() {
let recsLoaded = false;
function checkLoad() {
if (recsLoaded) return;
if ((window.scrollY + window.innerHeight) >= document.body.scrollHeight - 500) {
recsLoaded = true;
window.removeEventListener('scroll', checkLoad);
initAlgoliaRecommendations();
}
}
window.addEventListener('scroll', checkLoad, { passive: true });
checkLoad();
});
</script>
</div>
<br>
<!-- <div id="algolia-recs-container" style="display: none;">
<div id="algolia-related-products" style="margin-bottom: 2em;"></div>
<style>
/* Common styles for both recommendation widgets */
.algolia-recs-section-header {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 1em; /* Adjust as needed, smaller than default h3 */
font-weight: bold;
margin-top: 0; /* Remove or reduce top margin */
margin-bottom: 15px; /* Space between header and recommendation cards */
color: var(--c-text); /* Use theme's text color */
font-style: italic;
}
/* Related Products Widget Styles */
#algolia-related-products .ais-RelatedProducts-list {
display: flex;
flex-direction: row; /* Arrange items horizontally */
flex-wrap: nowrap; /* Prevent wrapping to new lines, if possible */
justify-content: flex-start; /* Align items to the start of the container */
padding-left: 0; /* Remove default list padding */
list-style-type: none; /* Remove list bullets */
margin: 0;
}
#algolia-related-products .ais-RelatedProducts-item {
width: 32%; /* Adjust for 3 items: 32% * 3 items + 2% * 2 margins = 100% */
margin-right: 2%;
box-sizing: border-box; /* Include padding and border in the element's total width */
/* Optional: Basic card styling (uncomment to use) */
border: 1px solid color-mix(in srgb, var(--c-background) 85%, var(--c-text) 15%); /* Theme-aware light grey border */
padding: 0; /* Remove overall card padding, will be handled by elements */
text-align: left; /* Or 'center' if you prefer */
background-color: var(--c-background);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
#algolia-related-products .ais-RelatedProducts-item:last-child {
margin-right: 0; /* No margin for the last item in the row */
}
/* Styling for images within recommendation items */
#algolia-related-products .ais-RelatedProducts-item img {
display: block; /* Can help remove extra space below image */
width: calc(100% - 4px); /* Full width minus 2px L/R margins */
max-width: 100%; /* Ensures image does not exceed container if intrinsically smaller */
/* height: auto; -- Controlled by inline style's max-height and object-fit */
object-fit: cover; /* Ensure image covers the area, also in inline style */
margin: 2px; /* 2px margin on top, left, right. Bottom is overridden by inline style. */
/* margin-bottom: 8px; -- This is set by inline style in JS template */
}
/* Styling for the wrapper link to make the whole card clickable */
#algolia-related-products .ais-RelatedProducts-item a.ais-RelatedProducts-item-link-wrapper {
display: block; /* Make the link fill the list item */
text-decoration: none; /* Remove underline */
color: inherit; /* Use parent's text color */
}
#algolia-related-products .related-product-title {
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
display: -webkit-box;
-webkit-line-clamp: 2; /* Limit to 2 lines for WebKit browsers */
line-clamp: 2; /* Standard property */
-webkit-box-orient: vertical;
overflow: hidden;
text-overflow: ellipsis;
padding: 0 7px 7px 7px; /* 0 top, 7px L/R/B for text area */
line-height: 1.5; /* Adjust for better readability */
height: 3.3em; /* Current height: 3.3em. For 2 lines with 0.75em font & 1.5 line-height, calculated height would be 2.25em. */
color: var(--c-interactive); /* Use theme's interactive color */
}
/* This container will wrap the image and score, taking the original image's layout space. */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container {
position: relative; /* For positioning the score absolutely within */
display: block; /* Matches original image display and ensures proper block layout */
width: calc(100% - 4px); /* Adopts width from original image styling */
margin: 2px; /* Adopts margin from original image styling */
margin-bottom: 8px; /* Adopts specific bottom margin from original image's inline style */
line-height: 0; /* Prevents unexpected space if child elements are treated as inline */
}
/* The image itself, now filling the container */
#algolia-related-products .ais-RelatedProducts-item .recommendation-image-container img {
display: block;
width: 100%; /* Fill the container's width */
max-width: 20em; /* Optional: retain original max-width constraint for the image content */
height: auto; /* Maintain aspect ratio by default */
max-height: 12em; /* Constrain image height (adjust as needed) */
object-fit: cover; /* Ensures image covers the allocated space, cropping if necessary */
margin: 0 auto; /* Center image if max-width kicks in and it's narrower than container */
}
/* The score overlay box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score {
position: absolute;
bottom: 3px; /* Padding from the bottom edge of the container */
right: 3px; /* Padding from the right edge of the container */
background-color: color-mix(in srgb, var(--c-background) 85%, var(--green) 15%); /* Theme-aware light green */
color: var(--green); /* Theme's green color for text */
padding: 3px 6px; /* Slightly adjusted padding */
font-family: 'Raleway', Helvetica, sans-serif;
font-size: 0.75em;
font-weight: bold;
border-radius: 10px; /* More rounded corners like the example */
border: 1px solid var(--green); /* Theme's green color for border */
line-height: 1; /* Critical for small text in a small box */
z-index: 10; /* Ensure it's above the image */
box-shadow: 0 1px 2px rgba(0,0,0,0.15); /* Softer shadow */
display: flex; /* To align icon and text nicely */
align-items: center; /* Vertically center icon and text */
}
/* Styling for the SVG icon within the score box */
#algolia-related-products .ais-RelatedProducts-item .recommendation-score-icon {
width: 0.9em; /* Scale with score's font size */
height: 0.9em;
vertical-align: -0.1em; /* Fine-tune vertical alignment */
margin-right: 4px; /* Space between icon and score number */
fill: var(--green); /* Theme's green color for icon */
}
</style>
<script>
// Function to load a script and return a promise
function loadScript(src) {
return new Promise((resolve, reject) => {
if (document.querySelector(`script[src="${src}"]`)) {
resolve(); // Already loaded
return;
}
const script = document.createElement('script');
script.src = src;
script.onload = () => resolve();
script.onerror = () => reject(new Error(`Script load error for ${src}`));
document.head.appendChild(script);
});
}
function initAlgoliaRecommendations() {
Promise.all([
loadScript('https://cdn.jsdelivr.net/npm/algoliasearch@4/dist/algoliasearch-lite.umd.js'),
loadScript('https://cdn.jsdelivr.net/npm/instantsearch.js@4')
])
.then(() => {
// Initialize the Related Products widget
initRelatedProducts();
})
.catch(error => {
// Handle error silently
});
}
function initRelatedProducts() {
// Ensure container exists
if (!document.getElementById('algolia-related-products')) {
// Container not found, stopping initialization
return;
}
const recSearchClient = algoliasearch(
'2XJCLEABQD',
'b61ec4cb64bd32d62c053466fccbfa43'
);
const relatedSearch = instantsearch({
indexName: 'eugeneyan.com',
searchClient: recSearchClient,
clickAnalytics: true,
insights: true, // Enable insights for click tracking on recommendations
});
relatedSearch.addWidgets([
instantsearch.widgets.relatedProducts({
container: '#algolia-related-products',
objectIDs: ['/writing/product-categorization-api-part-1-data-acquisition-and-formatting/'],
limit: 3,
queryParameters: {
attributesToRetrieve: ['title', 'url', 'image', 'score', '_score'], // Specify only needed attributes
attributesToHighlight: [], // Disable highlighting
attributesToSnippet: [] // Disable snippeting
},
translations: {
title: '', // Custom title is in _layouts/post.html
},
transformItems: function(items) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
if (items.length === 0) {
if (relatedElement) relatedElement.style.display = 'none';
if (containerElement) containerElement.style.display = 'none';
} else {
if (relatedElement) relatedElement.style.display = 'block';
if (containerElement) containerElement.style.display = 'block';
}
return items;
},
templates: {
header() { // Removed unused results, html parameters
// Return a PLAIN string for the header
return '<h4 class="algolia-recs-section-header">You Might Also Like</h4>';
},
item: function(hit, { html, sendEvent }) { // Added sendEvent to params
const itemUrl = `${hit.url || '#'}`;
const indexName = 'eugeneyan.com'; // Get index name for insights
let imageUrl;
// Ensure hit.image is not null, undefined, or an empty/whitespace string before using it.
if (hit.image && typeof hit.image === 'string' && hit.image.trim() !== '') {
imageUrl = `/assets/og_image/${hit.image}`;
} else {
imageUrl = `/assets/og_image/default-v4.jpg`; // Default image
}
let scoreValue = null;
if (typeof hit.score === 'number') scoreValue = hit.score.toFixed(2);
else if (typeof hit._score === 'number') scoreValue = hit._score.toFixed(2);
// else if (typeof hit.your_custom_score_field === 'number') scoreValue = hit.your_custom_score_field.toFixed(2);
const scoreElement = scoreValue ? `<div class="recommendation-score"><svg viewbox="0 0 24 24" class="recommendation-score-icon" xmlns="http://www.w3.org/2000/svg"><path d="M16 6l2.29 2.29-4.88 4.88-4-4L2 16.59 3.41 18l6-6 4 4 6.3-6.29L22 12V6h-6z"></path></svg>${scoreValue}</div>` : '';
const imageAndScoreTag = `
<div class="recommendation-image-container">
<img src="${imageUrl}" alt="${hit.title || 'Recommendation cover image'}">
${scoreElement}
</div>`;
const title = hit.title || 'Untitled Post';
return `
<a href="${itemUrl}"
class="ais-RelatedProducts-item-link-wrapper"
onClick="${() => {
sendEvent('click', hit, 'Related Item Clicked');
}}"
>
${imageAndScoreTag}
<div class="related-product-title">${title}</div>
</a>
`;
},
empty(results, { html }) {
const containerElement = document.getElementById('algolia-recs-container');
const relatedElement = document.getElementById('algolia-related-products');
if (relatedElement) {
relatedElement.style.display = 'none';
}
// If this 'empty' template is called, it means related products are empty.
// So, the main container should also be hidden.
if (containerElement) {
containerElement.style.display = 'none';
}
// Return an empty string to prevent rendering 'undefined'
return '';
}
}
})
]);
relatedSearch.start();
}
document.addEventListener('DOMContentLoaded', function() {
let recsLoaded = false;
function checkLoad() {
if (recsLoaded) return;
if ((window.scrollY + window.innerHeight) >= document.body.scrollHeight - 500) {
recsLoaded = true;
window.removeEventListener('scroll', checkLoad);
initAlgoliaRecommendations();
}
}
window.addEventListener('scroll', checkLoad, { passive: true });
checkLoad();
});
</script>
</div> -->
<span style="font-family: 'Raleway', Helvetica, sans-serif;">Browse related tags:</span> <span class="no-italics">[
<a class='tag' href="/tag/machinelearning/">machinelearning</a>
<a class='tag' href="/tag/python/">python</a>
<a class='tag' href="/tag/🛠/">🛠</a>
]
</span> <span style="font-family: 'Raleway', Helvetica, sans-serif;"> or </span><a href="/search/" title="Search" style="text-decoration: none; font-family: 'Raleway', Helvetica, sans-serif;"><img class="icon icon-search" src="/assets/icon-search.svg" loading="lazy" alt="" style="vertical-align: middle; margin-right: 0.25em;"/>Search</a>
<div class="PageNavigation">
<a class="prev sans-serif" href="/writing/thoughts-on-functional-programming-in-scala-course-coursera/">« Thoughts on Functional Programming in Scala Course (Coursera)</a>
<a class="next sans-serif" href="/writing/sortmyskills-is-now-live/">SortMySkills is now live! »</a>
</div>
<hr>
<p style="font-size: 15px; text-align: center; margin: 2em 0 0.5em">Join <b>11,800+</b> readers getting updates on machine learning, RecSys, LLMs, and engineering.</p>
<script src="https://f.convertkit.com/ckjs/ck.6.js" type="e8a9aab4af92c290f6da290e-text/javascript"></script>
<form action="https://app.convertkit.com/forms/4004980/subscriptions" class="seva-form formkit-form" method="post" data-sv-form="4004980" data-uid="96a310b6ce" data-format="inline" data-version="6" data-options="{"settings":{"after_subscribe":{"action":"message","success_message":"Just sent a confirmation! Check your inbox.","redirect_url":""},"analytics":{"google":null,"fathom":null,"facebook":null,"segment":null,"pinterest":null,"sparkloop":null,"googletagmanager":null},"modal":{"trigger":"timer","scroll_percentage":null,"timer":5,"devices":"all","show_once_every":15},"powered_by":{"show":false,"url":"https://convertkit.com/features/forms?utm_campaign=poweredby&utm_content=form&utm_medium=referral&utm_source=dynamic"},"recaptcha":{"enabled":false},"return_visitor":{"action":"show","custom_content":""},"slide_in":{"display_in":"bottom_right","trigger":"timer","scroll_percentage":null,"timer":5,"devices":"all","show_once_every":15},"sticky_bar":{"display_in":"top","trigger":"timer","scroll_percentage":null,"timer":5,"devices":"all","show_once_every":15}},"version":"6"}" min-width="400 500 600 700 800"><div data-style="clean"><ul class="formkit-alert formkit-alert-error" data-element="errors" data-group="alert"></ul><div data-element="fields" data-stacked="false" class="seva-fields formkit-fields"><div class="formkit-field"><input class="formkit-input" name="email_address" style="color: rgb(0, 0, 0); border-color: rgb(227, 227, 227); border-radius: 4px; font-weight: 400;" aria-label="Your email address..." placeholder="Your email address..." required="" type="email"></div><button data-element="submit" class="formkit-submit formkit-submit" style="color: rgb(255, 255, 255); background-color: rgb(0, 123, 255); border-radius: 5px; font-weight: 400;"><div class="formkit-spinner"><div></div><div></div><div></div></div><span class="">Get email updates</span></button></div></div><style>.formkit-form[data-uid="96a310b6ce"] *{box-sizing:border-box;}.formkit-form[data-uid="96a310b6ce"]{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;}.formkit-form[data-uid="96a310b6ce"] legend{border:none;font-size:inherit;margin-bottom:10px;padding:0;position:relative;display:table;}.formkit-form[data-uid="96a310b6ce"] fieldset{border:0;padding:0.01em 0 0 0;margin:0;min-width:0;}.formkit-form[data-uid="96a310b6ce"] body:not(:-moz-handler-blocked) fieldset{display:table-cell;}.formkit-form[data-uid="96a310b6ce"] h1,.formkit-form[data-uid="96a310b6ce"] h2,.formkit-form[data-uid="96a310b6ce"] h3,.formkit-form[data-uid="96a310b6ce"] h4,.formkit-form[data-uid="96a310b6ce"] h5,.formkit-form[data-uid="96a310b6ce"] h6{color:inherit;font-size:inherit;font-weight:inherit;}.formkit-form[data-uid="96a310b6ce"] h2{font-size:1.5em;margin:1em 0;}.formkit-form[data-uid="96a310b6ce"] h3{font-size:1.17em;margin:1em 0;}.formkit-form[data-uid="96a310b6ce"] p{color:inherit;font-size:inherit;font-weight:inherit;}.formkit-form[data-uid="96a310b6ce"] ol:not([template-default]),.formkit-form[data-uid="96a310b6ce"] ul:not([template-default]),.formkit-form[data-uid="96a310b6ce"] blockquote:not([template-default]){text-align:left;}.formkit-form[data-uid="96a310b6ce"] p:not([template-default]),.formkit-form[data-uid="96a310b6ce"] hr:not([template-default]),.formkit-form[data-uid="96a310b6ce"] blockquote:not([template-default]),.formkit-form[data-uid="96a310b6ce"] ol:not([template-default]),.formkit-form[data-uid="96a310b6ce"] ul:not([template-default]){color:inherit;font-style:initial;}.formkit-form[data-uid="96a310b6ce"] .ordered-list,.formkit-form[data-uid="96a310b6ce"] .unordered-list{list-style-position:outside !important;padding-left:1em;}.formkit-form[data-uid="96a310b6ce"] .list-item{padding-left:0;}.formkit-form[data-uid="96a310b6ce"][data-format="modal"]{display:none;}.formkit-form[data-uid="96a310b6ce"][data-format="slide in"]{display:none;}.formkit-form[data-uid="96a310b6ce"][data-format="sticky bar"]{display:none;}.formkit-sticky-bar .formkit-form[data-uid="96a310b6ce"][data-format="sticky bar"]{display:block;}.formkit-form[data-uid="96a310b6ce"] .formkit-input,.formkit-form[data-uid="96a310b6ce"] .formkit-select,.formkit-form[data-uid="96a310b6ce"] .formkit-checkboxes{width:100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-button,.formkit-form[data-uid="96a310b6ce"] .formkit-submit{border:0;border-radius:5px;color:#ffffff;cursor:pointer;display:inline-block;text-align:center;font-size:15px;font-weight:500;cursor:pointer;margin-bottom:15px;overflow:hidden;padding:0;position:relative;vertical-align:middle;}.formkit-form[data-uid="96a310b6ce"] .formkit-button:hover,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:hover,.formkit-form[data-uid="96a310b6ce"] .formkit-button:focus,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:focus{outline:none;}.formkit-form[data-uid="96a310b6ce"] .formkit-button:hover > span,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:hover > span,.formkit-form[data-uid="96a310b6ce"] .formkit-button:focus > span,.formkit-form[data-uid="96a310b6ce"] .formkit-submit:focus > span{background-color:rgba(0,0,0,0.1);}.formkit-form[data-uid="96a310b6ce"] .formkit-button > span,.formkit-form[data-uid="96a310b6ce"] .formkit-submit > span{display:block;-webkit-transition:all 300ms ease-in-out;transition:all 300ms ease-in-out;padding:12px 24px;}.formkit-form[data-uid="96a310b6ce"] .formkit-input{background:#ffffff;font-size:15px;padding:12px;border:1px solid #e3e3e3;-webkit-flex:1 0 auto;-ms-flex:1 0 auto;flex:1 0 auto;line-height:1.4;margin:0;-webkit-transition:border-color ease-out 300ms;transition:border-color ease-out 300ms;}.formkit-form[data-uid="96a310b6ce"] .formkit-input:focus{outline:none;border-color:#1677be;-webkit-transition:border-color ease 300ms;transition:border-color ease 300ms;}.formkit-form[data-uid="96a310b6ce"] .formkit-input::-webkit-input-placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] .formkit-input::-moz-placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] .formkit-input:-ms-input-placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] .formkit-input::placeholder{color:inherit;opacity:0.8;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"]{position:relative;display:inline-block;width:100%;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"]::before{content:"";top:calc(50% - 2.5px);right:10px;position:absolute;pointer-events:none;border-color:#4f4f4f transparent transparent transparent;border-style:solid;border-width:6px 6px 0 6px;height:0;width:0;z-index:999;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"] select{height:auto;width:100%;cursor:pointer;color:#333333;line-height:1.4;margin-bottom:0;padding:0 6px;-webkit-appearance:none;-moz-appearance:none;appearance:none;font-size:15px;padding:12px;padding-right:25px;border:1px solid #e3e3e3;background:#ffffff;}.formkit-form[data-uid="96a310b6ce"] [data-group="dropdown"] select:focus{outline:none;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"]{text-align:left;margin:0;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"]{margin-bottom:10px;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] *{cursor:pointer;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"]:last-of-type{margin-bottom:0;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"]{display:none;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"] + label::after{content:none;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"]:checked + label::after{border-color:#ffffff;content:"";}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] input[type="checkbox"]:checked + label::before{background:#10bf7a;border-color:#10bf7a;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label{position:relative;display:inline-block;padding-left:28px;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::before,.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::after{position:absolute;content:"";display:inline-block;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::before{height:16px;width:16px;border:1px solid #e3e3e3;background:#ffffff;left:0px;top:3px;}.formkit-form[data-uid="96a310b6ce"] [data-group="checkboxes"] [data-group="checkbox"] label::after{height:4px;width:8px;border-left:2px solid #4d4d4d;border-bottom:2px solid #4d4d4d;-webkit-transform:rotate(-45deg);-ms-transform:rotate(-45deg);transform:rotate(-45deg);left:4px;top:8px;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert{background:#f9fafb;border:1px solid #e3e3e3;border-radius:5px;-webkit-flex:1 0 auto;-ms-flex:1 0 auto;flex:1 0 auto;list-style:none;margin:25px auto;padding:12px;text-align:center;width:100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert:empty{display:none;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert-success{background:#d3fbeb;border-color:#10bf7a;color:#0c905c;}.formkit-form[data-uid="96a310b6ce"] .formkit-alert-error{background:#fde8e2;border-color:#f2643b;color:#ea4110;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;height:0px;width:0px;margin:0 auto;position:absolute;top:0;left:0;right:0;width:0px;overflow:hidden;text-align:center;-webkit-transition:all 300ms ease-in-out;transition:all 300ms ease-in-out;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner > div{margin:auto;width:12px;height:12px;background-color:#fff;opacity:0.3;border-radius:100%;display:inline-block;-webkit-animation:formkit-bouncedelay-formkit-form-data-uid-96a310b6ce- 1.4s infinite ease-in-out both;animation:formkit-bouncedelay-formkit-form-data-uid-96a310b6ce- 1.4s infinite ease-in-out both;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner > div:nth-child(1){-webkit-animation-delay:-0.32s;animation-delay:-0.32s;}.formkit-form[data-uid="96a310b6ce"] .formkit-spinner > div:nth-child(2){-webkit-animation-delay:-0.16s;animation-delay:-0.16s;}.formkit-form[data-uid="96a310b6ce"] .formkit-submit[data-active] .formkit-spinner{opacity:1;height:100%;width:50px;}.formkit-form[data-uid="96a310b6ce"] .formkit-submit[data-active] .formkit-spinner ~ span{opacity:0;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by[data-active="false"]{opacity:0.35;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit-container{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;width:100%;z-index:5;margin:10px 0;position:relative;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit-container[data-active="false"]{opacity:0.35;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit{-webkit-align-items:center;-webkit-box-align:center;-ms-flex-align:center;align-items:center;background-color:#ffffff;border:1px solid #dde2e7;border-radius:4px;color:#373f45;cursor:pointer;display:block;height:36px;margin:0 auto;opacity:0.95;padding:0;-webkit-text-decoration:none;text-decoration:none;text-indent:100%;-webkit-transition:ease-in-out all 200ms;transition:ease-in-out all 200ms;white-space:nowrap;overflow:hidden;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;width:190px;background-repeat:no-repeat;background-position:center;background-image:url("data:image/svg+xml;charset=utf8,%3Csvg width='162' height='20' viewBox='0 0 162 20' fill='none' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M83.0561 15.2457C86.675 15.2457 89.4722 12.5154 89.4722 9.14749C89.4722 5.99211 86.8443 4.06563 85.1038 4.06563C82.6801 4.06563 80.7373 5.76407 80.4605 8.28551C80.4092 8.75244 80.0387 9.14403 79.5686 9.14069C78.7871 9.13509 77.6507 9.12841 76.9314 9.13092C76.6217 9.13199 76.3658 8.88106 76.381 8.57196C76.4895 6.38513 77.2218 4.3404 78.618 2.76974C80.1695 1.02445 82.4289 0 85.1038 0C89.5979 0 93.8406 4.07791 93.8406 9.14749C93.8406 14.7608 89.1832 19.3113 83.1517 19.3113C78.8502 19.3113 74.5179 16.5041 73.0053 12.5795C72.9999 12.565 72.9986 12.5492 73.0015 12.534C73.0218 12.4179 73.0617 12.3118 73.1011 12.2074C73.1583 12.0555 73.2143 11.907 73.2062 11.7359L73.18 11.1892C73.174 11.0569 73.2075 10.9258 73.2764 10.8127C73.3452 10.6995 73.4463 10.6094 73.5666 10.554L73.7852 10.4523C73.9077 10.3957 74.0148 10.3105 74.0976 10.204C74.1803 10.0974 74.2363 9.97252 74.2608 9.83983C74.3341 9.43894 74.6865 9.14749 75.0979 9.14749C75.7404 9.14749 76.299 9.57412 76.5088 10.1806C77.5188 13.1 79.1245 15.2457 83.0561 15.2457Z' fill='%23373F45'/%3E%3Cpath d='M155.758 6.91365C155.028 6.91365 154.804 6.47916 154.804 5.98857C154.804 5.46997 154.986 5.06348 155.758 5.06348C156.53 5.06348 156.712 5.46997 156.712 5.98857C156.712 6.47905 156.516 6.91365 155.758 6.91365ZM142.441 12.9304V9.32833L141.415 9.32323V8.90392C141.415 8.44719 141.786 8.07758 142.244 8.07986L142.441 8.08095V6.55306L144.082 6.09057V8.08073H145.569V8.50416C145.569 8.61242 145.548 8.71961 145.506 8.81961C145.465 8.91961 145.404 9.01047 145.328 9.08699C145.251 9.16351 145.16 9.2242 145.06 9.26559C144.96 9.30698 144.853 9.32826 144.745 9.32822H144.082V12.7201C144.082 13.2423 144.378 13.4256 144.76 13.4887C145.209 13.5629 145.583 13.888 145.583 14.343V14.9626C144.029 14.9626 142.441 14.8942 142.441 12.9304Z' fill='%23373F45'/%3E%3Cpath d='M110.058 7.92554C108.417 7.88344 106.396 8.92062 106.396 11.5137C106.396 14.0646 108.417 15.0738 110.058 15.0318C111.742 15.0738 113.748 14.0646 113.748 11.5137C113.748 8.92062 111.742 7.88344 110.058 7.92554ZM110.07 13.7586C108.878 13.7586 108.032 12.8905 108.032 11.461C108.032 10.1013 108.878 9.20569 110.071 9.20569C111.263 9.20569 112.101 10.0995 112.101 11.459C112.101 12.8887 111.263 13.7586 110.07 13.7586Z' fill='%23373F45'/%3E%3Cpath d='M118.06 7.94098C119.491 7.94098 120.978 8.33337 120.978 11.1366V14.893H120.063C119.608 14.893 119.238 14.524 119.238 14.0689V10.9965C119.238 9.66506 118.747 9.16047 117.891 9.16047C117.414 9.16047 116.797 9.52486 116.502 9.81915V14.069C116.502 14.1773 116.481 14.2845 116.44 14.3845C116.398 14.4845 116.337 14.5753 116.261 14.6519C116.184 14.7284 116.093 14.7891 115.993 14.8305C115.893 14.8719 115.786 14.8931 115.678 14.8931H114.847V8.10918H115.773C115.932 8.10914 116.087 8.16315 116.212 8.26242C116.337 8.36168 116.424 8.50033 116.46 8.65577C116.881 8.19328 117.428 7.94098 118.06 7.94098ZM122.854 8.09713C123.024 8.09708 123.19 8.1496 123.329 8.2475C123.468 8.34541 123.574 8.48391 123.631 8.64405L125.133 12.8486L126.635 8.64415C126.692 8.48402 126.798 8.34551 126.937 8.2476C127.076 8.1497 127.242 8.09718 127.412 8.09724H128.598L126.152 14.3567C126.091 14.5112 125.986 14.6439 125.849 14.7374C125.711 14.831 125.549 14.881 125.383 14.8809H124.333L121.668 8.09713H122.854Z' fill='%23373F45'/%3E%3Cpath d='M135.085 14.5514C134.566 14.7616 133.513 15.0416 132.418 15.0416C130.496 15.0416 129.024 13.9345 129.024 11.4396C129.024 9.19701 130.451 7.99792 132.191 7.99792C134.338 7.99792 135.254 9.4378 135.158 11.3979C135.139 11.8029 134.786 12.0983 134.38 12.0983H130.679C130.763 13.1916 131.562 13.7662 132.615 13.7662C133.028 13.7662 133.462 13.7452 133.983 13.6481C134.535 13.545 135.085 13.9375 135.085 14.4985V14.5514ZM133.673 10.949C133.785 9.87621 133.061 9.28752 132.191 9.28752C131.321 9.28752 130.734 9.93979 130.679 10.9489L133.673 10.949Z' fill='%23373F45'/%3E%3Cpath d='M137.345 8.11122C137.497 8.11118 137.645 8.16229 137.765 8.25635C137.884 8.35041 137.969 8.48197 138.005 8.62993C138.566 8.20932 139.268 7.94303 139.759 7.94303C139.801 7.94303 140.068 7.94303 140.489 7.99913V8.7265C140.489 9.11748 140.15 9.4147 139.759 9.4147C139.31 9.4147 138.651 9.5829 138.131 9.8773V14.8951H136.462V8.11112L137.345 8.11122ZM156.6 14.0508V8.09104H155.769C155.314 8.09104 154.944 8.45999 154.944 8.9151V14.8748H155.775C156.23 14.8748 156.6 14.5058 156.6 14.0508ZM158.857 12.9447V9.34254H157.749V8.91912C157.749 8.46401 158.118 8.09506 158.574 8.09506H158.857V6.56739L160.499 6.10479V8.09506H161.986V8.51848C161.986 8.97359 161.617 9.34254 161.161 9.34254H160.499V12.7345C160.499 13.2566 160.795 13.44 161.177 13.503C161.626 13.5774 162 13.9024 162 14.3574V14.977C160.446 14.977 158.857 14.9086 158.857 12.9447ZM98.1929 10.1124C98.2033 6.94046 100.598 5.16809 102.895 5.16809C104.171 5.16809 105.342 5.44285 106.304 6.12953L105.914 6.6631C105.654 7.02011 105.16 7.16194 104.749 6.99949C104.169 6.7702 103.622 6.7218 103.215 6.7218C101.335 6.7218 99.9169 7.92849 99.9068 10.1123C99.9169 12.2959 101.335 13.5201 103.215 13.5201C103.622 13.5201 104.169 13.4717 104.749 13.2424C105.16 13.0799 105.654 13.2046 105.914 13.5615L106.304 14.0952C105.342 14.7819 104.171 15.0566 102.895 15.0566C100.598 15.0566 98.2033 13.2842 98.1929 10.1124ZM147.619 5.21768C148.074 5.21768 148.444 5.58663 148.444 6.04174V9.81968L151.82 5.58131C151.897 5.47733 151.997 5.39282 152.112 5.3346C152.227 5.27638 152.355 5.24607 152.484 5.24611H153.984L150.166 10.0615L153.984 14.8749H152.484C152.355 14.8749 152.227 14.8446 152.112 14.7864C151.997 14.7281 151.897 14.6436 151.82 14.5397L148.444 10.3025V14.0508C148.444 14.5059 148.074 14.8749 147.619 14.8749H146.746V5.21768H147.619Z' fill='%23373F45'/%3E%3Cpath d='M0.773438 6.5752H2.68066C3.56543 6.5752 4.2041 6.7041 4.59668 6.96191C4.99219 7.21973 5.18994 7.62695 5.18994 8.18359C5.18994 8.55859 5.09326 8.87061 4.8999 9.11963C4.70654 9.36865 4.42822 9.52539 4.06494 9.58984V9.63379C4.51611 9.71875 4.84717 9.88721 5.05811 10.1392C5.27197 10.3882 5.37891 10.7266 5.37891 11.1543C5.37891 11.7314 5.17676 12.1841 4.77246 12.5122C4.37109 12.8374 3.81152 13 3.09375 13H0.773438V6.5752ZM1.82373 9.22949H2.83447C3.27393 9.22949 3.59473 9.16064 3.79688 9.02295C3.99902 8.88232 4.1001 8.64502 4.1001 8.31104C4.1001 8.00928 3.99023 7.79102 3.77051 7.65625C3.55371 7.52148 3.20801 7.4541 2.7334 7.4541H1.82373V9.22949ZM1.82373 10.082V12.1167H2.93994C3.37939 12.1167 3.71045 12.0332 3.93311 11.8662C4.15869 11.6963 4.27148 11.4297 4.27148 11.0664C4.27148 10.7324 4.15723 10.4849 3.92871 10.3237C3.7002 10.1626 3.35303 10.082 2.88721 10.082H1.82373Z' fill='%23373F45'/%3E%3Cpath d='M13.011 6.5752V10.7324C13.011 11.207 12.9084 11.623 12.7034 11.9805C12.5012 12.335 12.2068 12.6089 11.8201 12.8022C11.4363 12.9927 10.9763 13.0879 10.4402 13.0879C9.6433 13.0879 9.02368 12.877 8.5813 12.4551C8.13892 12.0332 7.91772 11.4531 7.91772 10.7148V6.5752H8.9724V10.6401C8.9724 11.1704 9.09546 11.5615 9.34155 11.8135C9.58765 12.0654 9.96557 12.1914 10.4753 12.1914C11.4656 12.1914 11.9607 11.6714 11.9607 10.6313V6.5752H13.011Z' fill='%23373F45'/%3E%3Cpath d='M15.9146 13V6.5752H16.9649V13H15.9146Z' fill='%23373F45'/%3E%3Cpath d='M19.9255 13V6.5752H20.9758V12.0991H23.696V13H19.9255Z' fill='%23373F45'/%3E%3Cpath d='M28.2828 13H27.2325V7.47607H25.3428V6.5752H30.1724V7.47607H28.2828V13Z' fill='%23373F45'/%3E%3Cpath d='M41.9472 13H40.8046L39.7148 9.16796C39.6679 9.00097 39.6093 8.76074 39.539 8.44727C39.4687 8.13086 39.4262 7.91113 39.4116 7.78809C39.3823 7.97559 39.3339 8.21875 39.2665 8.51758C39.2021 8.81641 39.1479 9.03905 39.1039 9.18554L38.0405 13H36.8979L36.0673 9.7832L35.2236 6.5752H36.2958L37.2143 10.3193C37.3578 10.9199 37.4604 11.4502 37.5219 11.9102C37.5541 11.6611 37.6025 11.3828 37.6669 11.0752C37.7314 10.7676 37.79 10.5186 37.8427 10.3281L38.8886 6.5752H39.9301L41.0024 10.3457C41.1049 10.6943 41.2133 11.2158 41.3276 11.9102C41.3715 11.4912 41.477 10.958 41.644 10.3105L42.558 6.5752H43.6215L41.9472 13Z' fill='%23373F45'/%3E%3Cpath d='M45.7957 13V6.5752H46.846V13H45.7957Z' fill='%23373F45'/%3E%3Cpath d='M52.0258 13H50.9755V7.47607H49.0859V6.5752H53.9155V7.47607H52.0258V13Z' fill='%23373F45'/%3E%3Cpath d='M61.2312 13H60.1765V10.104H57.2146V13H56.1643V6.5752H57.2146V9.20312H60.1765V6.5752H61.2312V13Z' fill='%23373F45'/%3E%3C/svg%3E");}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit:hover,.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit:focus{background-color:#ffffff;-webkit-transform:scale(1.025) perspective(1px);-ms-transform:scale(1.025) perspective(1px);transform:scale(1.025) perspective(1px);opacity:1;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit[data-variant="dark"],.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit[data-variant="light"]{background-color:transparent;border-color:transparent;width:166px;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit[data-variant="light"]{color:#ffffff;background-image:url("data:image/svg+xml;charset=utf8,%3Csvg width='162' height='20' viewBox='0 0 162 20' fill='none' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M83.0561 15.2457C86.675 15.2457 89.4722 12.5154 89.4722 9.14749C89.4722 5.99211 86.8443 4.06563 85.1038 4.06563C82.6801 4.06563 80.7373 5.76407 80.4605 8.28551C80.4092 8.75244 80.0387 9.14403 79.5686 9.14069C78.7871 9.13509 77.6507 9.12841 76.9314 9.13092C76.6217 9.13199 76.3658 8.88106 76.381 8.57196C76.4895 6.38513 77.2218 4.3404 78.618 2.76974C80.1695 1.02445 82.4289 0 85.1038 0C89.5979 0 93.8406 4.07791 93.8406 9.14749C93.8406 14.7608 89.1832 19.3113 83.1517 19.3113C78.8502 19.3113 74.5179 16.5041 73.0053 12.5795C72.9999 12.565 72.9986 12.5492 73.0015 12.534C73.0218 12.4179 73.0617 12.3118 73.1011 12.2074C73.1583 12.0555 73.2143 11.907 73.2062 11.7359L73.18 11.1892C73.174 11.0569 73.2075 10.9258 73.2764 10.8127C73.3452 10.6995 73.4463 10.6094 73.5666 10.554L73.7852 10.4523C73.9077 10.3957 74.0148 10.3105 74.0976 10.204C74.1803 10.0974 74.2363 9.97252 74.2608 9.83983C74.3341 9.43894 74.6865 9.14749 75.0979 9.14749C75.7404 9.14749 76.299 9.57412 76.5088 10.1806C77.5188 13.1 79.1245 15.2457 83.0561 15.2457Z' fill='white'/%3E%3Cpath d='M155.758 6.91365C155.028 6.91365 154.804 6.47916 154.804 5.98857C154.804 5.46997 154.986 5.06348 155.758 5.06348C156.53 5.06348 156.712 5.46997 156.712 5.98857C156.712 6.47905 156.516 6.91365 155.758 6.91365ZM142.441 12.9304V9.32833L141.415 9.32323V8.90392C141.415 8.44719 141.786 8.07758 142.244 8.07986L142.441 8.08095V6.55306L144.082 6.09057V8.08073H145.569V8.50416C145.569 8.61242 145.548 8.71961 145.506 8.81961C145.465 8.91961 145.404 9.01047 145.328 9.08699C145.251 9.16351 145.16 9.2242 145.06 9.26559C144.96 9.30698 144.853 9.32826 144.745 9.32822H144.082V12.7201C144.082 13.2423 144.378 13.4256 144.76 13.4887C145.209 13.5629 145.583 13.888 145.583 14.343V14.9626C144.029 14.9626 142.441 14.8942 142.441 12.9304Z' fill='white'/%3E%3Cpath d='M110.058 7.92554C108.417 7.88344 106.396 8.92062 106.396 11.5137C106.396 14.0646 108.417 15.0738 110.058 15.0318C111.742 15.0738 113.748 14.0646 113.748 11.5137C113.748 8.92062 111.742 7.88344 110.058 7.92554ZM110.07 13.7586C108.878 13.7586 108.032 12.8905 108.032 11.461C108.032 10.1013 108.878 9.20569 110.071 9.20569C111.263 9.20569 112.101 10.0995 112.101 11.459C112.101 12.8887 111.263 13.7586 110.07 13.7586Z' fill='white'/%3E%3Cpath d='M118.06 7.94098C119.491 7.94098 120.978 8.33337 120.978 11.1366V14.893H120.063C119.608 14.893 119.238 14.524 119.238 14.0689V10.9965C119.238 9.66506 118.747 9.16047 117.891 9.16047C117.414 9.16047 116.797 9.52486 116.502 9.81915V14.069C116.502 14.1773 116.481 14.2845 116.44 14.3845C116.398 14.4845 116.337 14.5753 116.261 14.6519C116.184 14.7284 116.093 14.7891 115.993 14.8305C115.893 14.8719 115.786 14.8931 115.678 14.8931H114.847V8.10918H115.773C115.932 8.10914 116.087 8.16315 116.212 8.26242C116.337 8.36168 116.424 8.50033 116.46 8.65577C116.881 8.19328 117.428 7.94098 118.06 7.94098ZM122.854 8.09713C123.024 8.09708 123.19 8.1496 123.329 8.2475C123.468 8.34541 123.574 8.48391 123.631 8.64405L125.133 12.8486L126.635 8.64415C126.692 8.48402 126.798 8.34551 126.937 8.2476C127.076 8.1497 127.242 8.09718 127.412 8.09724H128.598L126.152 14.3567C126.091 14.5112 125.986 14.6439 125.849 14.7374C125.711 14.831 125.549 14.881 125.383 14.8809H124.333L121.668 8.09713H122.854Z' fill='white'/%3E%3Cpath d='M135.085 14.5514C134.566 14.7616 133.513 15.0416 132.418 15.0416C130.496 15.0416 129.024 13.9345 129.024 11.4396C129.024 9.19701 130.451 7.99792 132.191 7.99792C134.338 7.99792 135.254 9.4378 135.158 11.3979C135.139 11.8029 134.786 12.0983 134.38 12.0983H130.679C130.763 13.1916 131.562 13.7662 132.615 13.7662C133.028 13.7662 133.462 13.7452 133.983 13.6481C134.535 13.545 135.085 13.9375 135.085 14.4985V14.5514ZM133.673 10.949C133.785 9.87621 133.061 9.28752 132.191 9.28752C131.321 9.28752 130.734 9.93979 130.679 10.9489L133.673 10.949Z' fill='white'/%3E%3Cpath d='M137.345 8.11122C137.497 8.11118 137.645 8.16229 137.765 8.25635C137.884 8.35041 137.969 8.48197 138.005 8.62993C138.566 8.20932 139.268 7.94303 139.759 7.94303C139.801 7.94303 140.068 7.94303 140.489 7.99913V8.7265C140.489 9.11748 140.15 9.4147 139.759 9.4147C139.31 9.4147 138.651 9.5829 138.131 9.8773V14.8951H136.462V8.11112L137.345 8.11122ZM156.6 14.0508V8.09104H155.769C155.314 8.09104 154.944 8.45999 154.944 8.9151V14.8748H155.775C156.23 14.8748 156.6 14.5058 156.6 14.0508ZM158.857 12.9447V9.34254H157.749V8.91912C157.749 8.46401 158.118 8.09506 158.574 8.09506H158.857V6.56739L160.499 6.10479V8.09506H161.986V8.51848C161.986 8.97359 161.617 9.34254 161.161 9.34254H160.499V12.7345C160.499 13.2566 160.795 13.44 161.177 13.503C161.626 13.5774 162 13.9024 162 14.3574V14.977C160.446 14.977 158.857 14.9086 158.857 12.9447ZM98.1929 10.1124C98.2033 6.94046 100.598 5.16809 102.895 5.16809C104.171 5.16809 105.342 5.44285 106.304 6.12953L105.914 6.6631C105.654 7.02011 105.16 7.16194 104.749 6.99949C104.169 6.7702 103.622 6.7218 103.215 6.7218C101.335 6.7218 99.9169 7.92849 99.9068 10.1123C99.9169 12.2959 101.335 13.5201 103.215 13.5201C103.622 13.5201 104.169 13.4717 104.749 13.2424C105.16 13.0799 105.654 13.2046 105.914 13.5615L106.304 14.0952C105.342 14.7819 104.171 15.0566 102.895 15.0566C100.598 15.0566 98.2033 13.2842 98.1929 10.1124ZM147.619 5.21768C148.074 5.21768 148.444 5.58663 148.444 6.04174V9.81968L151.82 5.58131C151.897 5.47733 151.997 5.39282 152.112 5.3346C152.227 5.27638 152.355 5.24607 152.484 5.24611H153.984L150.166 10.0615L153.984 14.8749H152.484C152.355 14.8749 152.227 14.8446 152.112 14.7864C151.997 14.7281 151.897 14.6436 151.82 14.5397L148.444 10.3025V14.0508C148.444 14.5059 148.074 14.8749 147.619 14.8749H146.746V5.21768H147.619Z' fill='white'/%3E%3Cpath d='M0.773438 6.5752H2.68066C3.56543 6.5752 4.2041 6.7041 4.59668 6.96191C4.99219 7.21973 5.18994 7.62695 5.18994 8.18359C5.18994 8.55859 5.09326 8.87061 4.8999 9.11963C4.70654 9.36865 4.42822 9.52539 4.06494 9.58984V9.63379C4.51611 9.71875 4.84717 9.88721 5.05811 10.1392C5.27197 10.3882 5.37891 10.7266 5.37891 11.1543C5.37891 11.7314 5.17676 12.1841 4.77246 12.5122C4.37109 12.8374 3.81152 13 3.09375 13H0.773438V6.5752ZM1.82373 9.22949H2.83447C3.27393 9.22949 3.59473 9.16064 3.79688 9.02295C3.99902 8.88232 4.1001 8.64502 4.1001 8.31104C4.1001 8.00928 3.99023 7.79102 3.77051 7.65625C3.55371 7.52148 3.20801 7.4541 2.7334 7.4541H1.82373V9.22949ZM1.82373 10.082V12.1167H2.93994C3.37939 12.1167 3.71045 12.0332 3.93311 11.8662C4.15869 11.6963 4.27148 11.4297 4.27148 11.0664C4.27148 10.7324 4.15723 10.4849 3.92871 10.3237C3.7002 10.1626 3.35303 10.082 2.88721 10.082H1.82373Z' fill='white'/%3E%3Cpath d='M13.011 6.5752V10.7324C13.011 11.207 12.9084 11.623 12.7034 11.9805C12.5012 12.335 12.2068 12.6089 11.8201 12.8022C11.4363 12.9927 10.9763 13.0879 10.4402 13.0879C9.6433 13.0879 9.02368 12.877 8.5813 12.4551C8.13892 12.0332 7.91772 11.4531 7.91772 10.7148V6.5752H8.9724V10.6401C8.9724 11.1704 9.09546 11.5615 9.34155 11.8135C9.58765 12.0654 9.96557 12.1914 10.4753 12.1914C11.4656 12.1914 11.9607 11.6714 11.9607 10.6313V6.5752H13.011Z' fill='white'/%3E%3Cpath d='M15.9146 13V6.5752H16.9649V13H15.9146Z' fill='white'/%3E%3Cpath d='M19.9255 13V6.5752H20.9758V12.0991H23.696V13H19.9255Z' fill='white'/%3E%3Cpath d='M28.2828 13H27.2325V7.47607H25.3428V6.5752H30.1724V7.47607H28.2828V13Z' fill='white'/%3E%3Cpath d='M41.9472 13H40.8046L39.7148 9.16796C39.6679 9.00097 39.6093 8.76074 39.539 8.44727C39.4687 8.13086 39.4262 7.91113 39.4116 7.78809C39.3823 7.97559 39.3339 8.21875 39.2665 8.51758C39.2021 8.81641 39.1479 9.03905 39.1039 9.18554L38.0405 13H36.8979L36.0673 9.7832L35.2236 6.5752H36.2958L37.2143 10.3193C37.3578 10.9199 37.4604 11.4502 37.5219 11.9102C37.5541 11.6611 37.6025 11.3828 37.6669 11.0752C37.7314 10.7676 37.79 10.5186 37.8427 10.3281L38.8886 6.5752H39.9301L41.0024 10.3457C41.1049 10.6943 41.2133 11.2158 41.3276 11.9102C41.3715 11.4912 41.477 10.958 41.644 10.3105L42.558 6.5752H43.6215L41.9472 13Z' fill='white'/%3E%3Cpath d='M45.7957 13V6.5752H46.846V13H45.7957Z' fill='white'/%3E%3Cpath d='M52.0258 13H50.9755V7.47607H49.0859V6.5752H53.9155V7.47607H52.0258V13Z' fill='white'/%3E%3Cpath d='M61.2312 13H60.1765V10.104H57.2146V13H56.1643V6.5752H57.2146V9.20312H60.1765V6.5752H61.2312V13Z' fill='white'/%3E%3C/svg%3E");}@-webkit-keyframes formkit-bouncedelay-formkit-form-data-uid-96a310b6ce-{0%,80%,100%{-webkit-transform:scale(0);-ms-transform:scale(0);transform:scale(0);}40%{-webkit-transform:scale(1);-ms-transform:scale(1);transform:scale(1);}}@keyframes formkit-bouncedelay-formkit-form-data-uid-96a310b6ce-{0%,80%,100%{-webkit-transform:scale(0);-ms-transform:scale(0);transform:scale(0);}40%{-webkit-transform:scale(1);-ms-transform:scale(1);transform:scale(1);}}.formkit-form[data-uid="96a310b6ce"] blockquote{padding:10px 20px;margin:0 0 20px;border-left:5px solid #e1e1e1;}.formkit-form[data-uid="96a310b6ce"] .seva-custom-content{padding:15px;font-size:16px;color:#fff;mix-blend-mode:difference;}.formkit-form[data-uid="96a310b6ce"] .formkit-modal.guard{max-width:420px;width:100%;} .formkit-form[data-uid="96a310b6ce"]{max-width:700px;}.formkit-form[data-uid="96a310b6ce"] [data-style="clean"]{width:100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-fields{display:-webkit-box;display:-webkit-flex;display:-ms-flexbox;display:flex;-webkit-flex-wrap:wrap;-ms-flex-wrap:wrap;flex-wrap:wrap;margin:0 auto;}.formkit-form[data-uid="96a310b6ce"] .formkit-field,.formkit-form[data-uid="96a310b6ce"] .formkit-submit{margin:0 0 15px 0;-webkit-flex:1 0 100%;-ms-flex:1 0 100%;flex:1 0 100%;}.formkit-form[data-uid="96a310b6ce"] .formkit-powered-by-convertkit-container{margin:0;}.formkit-form[data-uid="96a310b6ce"] .formkit-submit{position:static;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] [data-style="clean"],.formkit-form[data-uid="96a310b6ce"][min-width~="800"] [data-style="clean"]{padding:10px;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"],.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"]{margin-left:-5px;margin-right:-5px;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-field,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-field,.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-submit,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-submit{margin:0 5px 15px 5px;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-field,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-field{-webkit-flex:100 1 auto;-ms-flex:100 1 auto;flex:100 1 auto;}.formkit-form[data-uid="96a310b6ce"][min-width~="700"] .formkit-fields[data-stacked="false"] .formkit-submit,.formkit-form[data-uid="96a310b6ce"][min-width~="800"] .formkit-fields[data-stacked="false"] .formkit-submit{-webkit-flex:1 1 auto;-ms-flex:1 1 auto;flex:1 1 auto;} </style></form>
<hr>
<!-- Post comments -->
<script src="https://utteranc.es/client.js" repo="eugeneyan/eugeneyan-comments" issue-term="url" theme="github-light" crossorigin="anonymous" type="e8a9aab4af92c290f6da290e-text/javascript">
</script>
</div>
</div>
<footer class="footer">
<div class="footer-col-wrapper">
<div class="col-sm-3 footer-col">
<ul class="contact-list">
<!-- <li>
<img class="icon" src="/assets/bluesky.svg" loading="lazy" alt=""/>
<a rel="me" href="https://bsky.app/profile/eugeneyan.com" target="_blank" title="Bluesky">Bluesky</a>
</li> -->
<li>
<img class="icon" src="/assets/icon-twitter.svg" loading="lazy" alt=""/>
<a href="https://twitter.com/eugeneyan" target="_blank" title="Twitter">Twitter</a>
</li>
<li>
<img class="icon" src="/assets/icon-linkedin.svg" loading="lazy" alt=""/>
<a href="https://www.linkedin.com/in/eugeneyan/" target="_blank" title="Linkedin">LinkedIn</a>
</li>
<!-- <li>
<img class="icon" src="/assets/icon-threads.svg" loading="lazy" alt=""/>
<a href="https://www.threads.net/@eugeneyan" target="_blank" title="Threads">Threads</a>
</li> -->
<li>
<img class="icon" src="/assets/icon-github.svg" loading="lazy" alt=""/>
<a href="https://github.com/eugeneyan/" target="_blank" title="GitHub">GitHub</a>
</li>
</ul>
</div>
<div class="col-sm-9 footer-col">
<p>I'm a Member of Technical Staff at Anthropic. I work to bridge the field and the frontier, and help build safe, reliable AI systems that scale. I've led ML/AI teams at Amazon, Alibaba, Lazada, and a Healthtech Series A, and write about LLMs, RecSys, and engineering at <a href="https://eugeneyan.com/" target="_blank">eugeneyan.com</a>.</p>
</div>
</div>
<p class="copyright">© Eugene Yan 2015 - 2026
• <a href="/site-feedback/">Feedback</a>
• <a href="/rss/">RSS</a>
</p>
</footer>
</div> <!-- /container -->
</div>
<script src="/cdn-cgi/scripts/7d0fa10a/cloudflare-static/rocket-loader.min.js" data-cf-settings="e8a9aab4af92c290f6da290e-|49" defer></script><script defer src="https://static.cloudflareinsights.com/beacon.min.js/v833ccba57c9e4d2798f2e76cebdd09a11778172276447" integrity="sha512-57MDmcccJXYtNnH+ZiBwzC4jb2rvgVCEokYN+L/nLlmO8rfYT/gIpW2A569iJ/3b+0UEasghjuZH/ma3wIs/EQ==" data-cf-beacon='{"version":"2024.11.0","token":"4ba4ab6acad14218941be7fa4aaad127","r":1,"server_timing":{"name":{"cfCacheStatus":true,"cfEdge":true,"cfExtPri":true,"cfL4":true,"cfOrigin":true,"cfSpeedBrain":true},"location_startswith":null}}' crossorigin="anonymous"></script>
</body>
<script type="e8a9aab4af92c290f6da290e-text/javascript">
// Assemble mailto: from split data attributes on click (keeps full address out of source)
document.addEventListener('click', function(e) {
var a = e.target.closest('a.js-email');
if (!a) return;
e.preventDefault();
var d = a.dataset;
window.location.href = 'mailto:' + d.u + '@' + d.d + '.' + d.t;
});
// Ensure aa is loaded
document.addEventListener('DOMContentLoaded', function() {
if (typeof aa === 'function') {
// Get the current page path for more specific tracking
const pagePath = '/writing/product-categorization-api-part-1-data-acquisition-and-formatting/';
// Determine a specific event name based on the current page
let eventName;
if (pagePath.startsWith('/tag/')) {
eventName = 'Tag Page Link Clicked';
} else {
// Count the number of segments to determine if it's a site page or post page
const pathSegments = pagePath.split('/').filter(Boolean);
if (pathSegments.length <= 1) {
// Zero or one level deep (e.g., '/', '/writing/', '/speaking/')
eventName = 'Site Page Link Clicked';
} else {
// Two or more levels deep - considered a post
eventName = 'Post Link Clicked';
}
}
// Track clicks on internal links in the main container
const container = document.querySelector('div.container');
if (container) {
container.addEventListener('click', function(e) {
const link = e.target.closest('a');
if (link && !link.classList.contains('js-email') && link.href && link.origin === window.location.origin) {
const objectID = link.pathname;
aa('clickedObjectIDs', {
index: 'eugeneyan.com',
eventName: eventName,
objectIDs: [objectID]
});
}
});
}
}
});
// Track page read depth for conversion tracking
let hasTrackedPageRead = false;
window.addEventListener('scroll', function() {
if (hasTrackedPageRead) return; // Only track once per page view
// Calculate read depth as percentage
const scrollTop = window.pageYOffset || document.documentElement.scrollTop;
const scrollHeight = document.documentElement.scrollHeight;
const clientHeight = document.documentElement.clientHeight;
const readPercentage = (scrollTop + clientHeight) / scrollHeight * 100;
// If user has read at least 50% of the page
if (readPercentage >= 50 && typeof aa === 'function') {
hasTrackedPageRead = true;
const objectID = window.location.pathname; // Use current page path as objectID
const pagePath = '/writing/product-categorization-api-part-1-data-acquisition-and-formatting/';
// Create a meaningful event name
let eventName;
if (pagePath.startsWith('/tag/')) {
eventName = 'Tag Page Read 50%';
} else {
// Count the number of segments to determine if it's a site page or post page
const pathSegments = pagePath.split('/').filter(Boolean);
if (pathSegments.length <= 1) {
// Zero or one level deep (e.g., '/', '/writing/', '/speaking/')
eventName = 'Site Page Read 50%';
} else {
// Two or more levels deep - considered a post
eventName = 'Post Read 50%';
}
}
// Send the convertedObjectIDs event to Algolia
aa('convertedObjectIDs', {
index: 'eugeneyan.com',
eventName: eventName,
objectIDs: [objectID]
});
}
});
</script>
</html>