<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>NLP on Blogs by Anil</title>
    <link>https://paudelanil9.com.np/tags/nlp/</link>
    <description>Recent content in NLP on Blogs by Anil</description>
    <generator>Hugo -- 0.138.0</generator>
    <language>en</language>
    <lastBuildDate>Tue, 21 Apr 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://paudelanil9.com.np/tags/nlp/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>The Roman Nepali Embedding Problem</title>
      <link>https://paudelanil9.com.np/posts/romannepali_embedding_1/</link>
      <pubDate>Tue, 21 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://paudelanil9.com.np/posts/romannepali_embedding_1/</guid>
      <description>&lt;h1 id=&#34;the-roman-nepali-embedding-problem&#34;&gt;The Roman Nepali Embedding Problem&lt;/h1&gt;
&lt;p&gt;&lt;strong&gt;I spelled the same Nepali word four different ways and asked four open-source embedding models whether the spellings meant the same thing. The model with the prettiest-looking cosine gap wasn&amp;rsquo;t the one that actually worked — and a twenty-line preprocessing script beat all four of them without touching a single weight.&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;This post is a small experiment with a strong conclusion: if you are shipping NLP for Nepali users today, the best thing you can do is not a bigger model — it&amp;rsquo;s a regex.&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
