<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>Forem: Aman Sachan</title>
    <description>The latest articles on Forem by Aman Sachan (@aman_sachan_126d19c4a2773).</description>
    <link>https://forem.com/aman_sachan_126d19c4a2773</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F3905077%2Fb9a51a6d-6ccb-4265-afe4-af43e57b0e81.jpg</url>
      <title>Forem: Aman Sachan</title>
      <link>https://forem.com/aman_sachan_126d19c4a2773</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://forem.com/feed/aman_sachan_126d19c4a2773"/>
    <language>en</language>
    <item>
      <title>GhostPilot: GPS-Denied Drone Navigation with Visual SLAM and Agentic AI</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 20:20:57 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/ghostpilot-gps-denied-drone-navigation-with-visual-slam-and-agentic-ai-416l</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/ghostpilot-gps-denied-drone-navigation-with-visual-slam-and-agentic-ai-416l</guid>
      <description>&lt;h2&gt;
  
  
  The Problem
&lt;/h2&gt;

&lt;p&gt;Drones are useless without GPS. GPS jamming in Ukraine took out 85% of some squadrons. Indoors, urban canyons, forests — GPS simply fails.&lt;/p&gt;

&lt;p&gt;Military solutions cost $50K+. Academic code is unmaintained. There's no real open-source answer for GPS-denied drone flight.&lt;/p&gt;

&lt;h2&gt;
  
  
  What I Built
&lt;/h2&gt;

&lt;p&gt;&lt;strong&gt;GhostPilot&lt;/strong&gt; — an open-source visual-inertial SLAM + agentic AI navigation stack that lets any drone fly without GPS.&lt;/p&gt;

&lt;h2&gt;
  
  
  Key Features
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Visual-Inertial SLAM&lt;/strong&gt;: Camera + IMU fusion for 6DOF pose estimation — no GPS needed&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Agentic Mission Planner&lt;/strong&gt;: Natural language commands → executable navigation goals

&lt;ul&gt;
&lt;li&gt;"Fly to the third floor, check each room for occupants, land at the helipad"&lt;/li&gt;
&lt;li&gt;"Inspect the roof, avoid personnel, resume path at waypoint B"&lt;/li&gt;
&lt;/ul&gt;


&lt;/li&gt;

&lt;li&gt;

&lt;strong&gt;Nav2 Integration&lt;/strong&gt;: Industry-standard path planning + obstacle avoidance&lt;/li&gt;

&lt;li&gt;

&lt;strong&gt;Edge-Native&lt;/strong&gt;: Runs on Jetson Orin / Raspberry Pi 5, no cloud dependency&lt;/li&gt;

&lt;/ul&gt;

&lt;h2&gt;
  
  
  Quick Start
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;git clone https://github.com/amsach/GhostPilot.git
&lt;span class="nb"&gt;cd &lt;/span&gt;GhostPilot
./scripts/setup_jetson.sh
ros2 launch ghostpilot_gazebo indoor_warehouse.launch.py
ros2 run ghostpilot_agent mission_parser_node
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Architecture
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Package&lt;/th&gt;
&lt;th&gt;Description&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;ghostpilot_core&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;VINS-Mono SLAM + Nav2 integration&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;ghostpilot_agent&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;LLM-based mission parser + executor&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;&lt;code&gt;ghostpilot_gazebo&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Gazebo simulation world + models&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  Hardware
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Compute&lt;/strong&gt;: NVIDIA Jetson Orin AGX or Raspberry Pi 5&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Camera&lt;/strong&gt;: Intel RealSense D435i (stereo + IMU)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Frame&lt;/strong&gt;: Any MAVLink-capable quadcopter&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Comparison
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Feature&lt;/th&gt;
&lt;th&gt;GhostPilot&lt;/th&gt;
&lt;th&gt;Skydio&lt;/th&gt;
&lt;th&gt;Military&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Cost&lt;/td&gt;
&lt;td&gt;$0 (open-source)&lt;/td&gt;
&lt;td&gt;$5K+&lt;/td&gt;
&lt;td&gt;$50K+&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;GPS-denied&lt;/td&gt;
&lt;td&gt;✅ Native&lt;/td&gt;
&lt;td&gt;⚠️ Limited&lt;/td&gt;
&lt;td&gt;✅ Yes&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Agentic AI&lt;/td&gt;
&lt;td&gt;✅ Natural language&lt;/td&gt;
&lt;td&gt;❌ Waypoints&lt;/td&gt;
&lt;td&gt;❌ Pre-programmed&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;ROS2-native&lt;/td&gt;
&lt;td&gt;✅ Full&lt;/td&gt;
&lt;td&gt;❌ Closed&lt;/td&gt;
&lt;td&gt;❌ Proprietary&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  Status
&lt;/h2&gt;

&lt;p&gt;Core SLAM + Nav2 bridge working in simulation. Agentic layer in progress. PRs welcome.&lt;/p&gt;

&lt;p&gt;GitHub: &lt;a href="https://github.com/amsach/GhostPilot" rel="noopener noreferrer"&gt;https://github.com/amsach/GhostPilot&lt;/a&gt;&lt;/p&gt;

</description>
      <category>robotics</category>
      <category>ros2</category>
      <category>drone</category>
      <category>ai</category>
    </item>
    <item>
      <title>KVQuant: Run 70B LLMs on 8GB RAM with 4-bit KV Cache Quantization</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 19:45:28 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-4-bit-kv-cache-quantization-2igk</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-4-bit-kv-cache-quantization-2igk</guid>
      <description>&lt;p&gt;I compressed GPT-2 to run on an Arduino! Here's how I did it with KVQuant.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Problem&lt;/strong&gt;: LLMs need huge memory for key-value caches during inference.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Solution&lt;/strong&gt;: 4-bit KV cache quantization that reduces memory 4x with &amp;lt;1% accuracy loss.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Results&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;GPT-2: 512MB → 128MB (4x reduction)&lt;/li&gt;
&lt;li&gt;LLaMA-7B: 8GB → 2GB&lt;/li&gt;
&lt;li&gt;LLaMA-70B: 280GB → 70GB&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Code: github.com/AmSach/kvquant&lt;/p&gt;

</description>
      <category>python</category>
      <category>llm</category>
      <category>quantization</category>
      <category>optimization</category>
    </item>
    <item>
      <title>BitForge: Run LLMs on Microcontrollers</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 19:45:24 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/bitforge-run-llms-on-microcontrollers-5520</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/bitforge-run-llms-on-microcontrollers-5520</guid>
      <description>&lt;p&gt;I got GPT-2 running on an Arduino! Here's the quantization pipeline.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Process&lt;/strong&gt;:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Q4_K_M quantization via llama.cpp&lt;/li&gt;
&lt;li&gt;Memory-mapped flash for weight storage&lt;/li&gt;
&lt;li&gt;Optimized matvec for ARM Cortex-M&lt;/li&gt;
&lt;li&gt;KV cache quantization&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Results&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Arduino Nano 33 BLE: 3 tokens/sec&lt;/li&gt;
&lt;li&gt;ESP32-S3: 15 tokens/sec&lt;/li&gt;
&lt;li&gt;Raspberry Pi Pico: 8 tokens/sec&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Code: github.com/AmSach/bitforge&lt;/p&gt;

&lt;p&gt;Hardware requirements: 512KB RAM, 2MB flash.&lt;/p&gt;

</description>
      <category>llm</category>
      <category>esp32</category>
      <category>iot</category>
      <category>python</category>
    </item>
    <item>
      <title>KVQuant: Run 70B LLMs on 8GB RAM with 4-bit KV Cache Quantization</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 19:39:58 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-4-bit-kv-cache-quantization-4m4k</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-4-bit-kv-cache-quantization-4m4k</guid>
      <description>&lt;p&gt;I compressed GPT-2 to run on an Arduino! Here's how I did it with KVQuant.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Problem&lt;/strong&gt;: LLMs need huge memory for key-value caches during inference.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;The Solution&lt;/strong&gt;: 4-bit KV cache quantization that reduces memory 4x with &amp;lt;1% accuracy loss.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Results&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;GPT-2: 512MB → 128MB (4x reduction)&lt;/li&gt;
&lt;li&gt;LLaMA-7B: 8GB → 2GB&lt;/li&gt;
&lt;li&gt;LLaMA-70B: 280GB → 70GB&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Code: github.com/AmSach/kvquant&lt;/p&gt;

</description>
      <category>python</category>
      <category>llm</category>
      <category>quantization</category>
      <category>optimization</category>
    </item>
    <item>
      <title>BitForge: Run LLMs on Microcontrollers</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 19:39:54 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/bitforge-run-llms-on-microcontrollers-57ek</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/bitforge-run-llms-on-microcontrollers-57ek</guid>
      <description>&lt;p&gt;I got GPT-2 running on an Arduino! Here's the quantization pipeline.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;Process&lt;/strong&gt;:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;Q4_K_M quantization via llama.cpp&lt;/li&gt;
&lt;li&gt;Memory-mapped flash for weight storage&lt;/li&gt;
&lt;li&gt;Optimized matvec for ARM Cortex-M&lt;/li&gt;
&lt;li&gt;KV cache quantization&lt;/li&gt;
&lt;/ol&gt;

&lt;p&gt;&lt;strong&gt;Results&lt;/strong&gt;:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Arduino Nano 33 BLE: 3 tokens/sec&lt;/li&gt;
&lt;li&gt;ESP32-S3: 15 tokens/sec&lt;/li&gt;
&lt;li&gt;Raspberry Pi Pico: 8 tokens/sec&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Code: github.com/AmSach/bitforge&lt;/p&gt;

&lt;p&gt;Hardware requirements: 512KB RAM, 2MB flash.&lt;/p&gt;

</description>
      <category>llm</category>
      <category>esp32</category>
      <category>iot</category>
      <category>python</category>
    </item>
    <item>
      <title>VoxelNav: Real-time Semantic Voxel Mapping for ROS2 at 100ms</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 19:29:32 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/voxelnav-real-time-semantic-voxel-mapping-for-ros2-at-100ms-pd8</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/voxelnav-real-time-semantic-voxel-mapping-for-ros2-at-100ms-pd8</guid>
      <description>&lt;p&gt;I built VoxelNav because existing SLAM solutions are either too slow, too expensive, or too dumb.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;OpenGS-SLAM&lt;/strong&gt;: 30s latency, needs A100 GPU&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;OctoMap&lt;/strong&gt;: 500ms, no semantics&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;RTABMap&lt;/strong&gt;: 1s, desktop only&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Autonomous robots need real-time semantic understanding.&lt;/p&gt;

&lt;h2&gt;
  
  
  What VoxelNav Does
&lt;/h2&gt;

&lt;p&gt;ROS2-native node that converts LiDAR/RGB-D data into semantic voxel grids:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;100ms end-to-end latency&lt;/strong&gt; on Jetson Nano&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Works with any ROS2-compatible sensors&lt;/strong&gt;&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Nav2-compatible costmap output&lt;/strong&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Quick Start
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="nb"&gt;cd&lt;/span&gt; ~/ros2_ws/src
git clone https://github.com/AmSach/voxelnav.git
colcon build &lt;span class="nt"&gt;--packages-select&lt;/span&gt; voxelnav
ros2 launch voxelnav voxelnav.launch.py
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Benchmarks (Jetson Nano)
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Mode&lt;/th&gt;
&lt;th&gt;Latency&lt;/th&gt;
&lt;th&gt;Memory&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Geometry only&lt;/td&gt;
&lt;td&gt;30ms&lt;/td&gt;
&lt;td&gt;50MB&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Full semantic&lt;/td&gt;
&lt;td&gt;100ms&lt;/td&gt;
&lt;td&gt;150MB&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;GitHub: &lt;a href="https://github.com/AmSach/voxelnav" rel="noopener noreferrer"&gt;https://github.com/AmSach/voxelnav&lt;/a&gt;&lt;/p&gt;

</description>
      <category>ros2</category>
      <category>robotics</category>
      <category>mapping</category>
    </item>
    <item>
      <title>KVQuant: Run 70B LLMs on 8GB RAM with KV Cache Quantization</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 19:29:26 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-kv-cache-quantization-3460</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-kv-cache-quantization-3460</guid>
      <description>&lt;p&gt;I built KVQuant because running large LLMs locally is a nightmare — not because of model weights, but because of the &lt;strong&gt;KV cache&lt;/strong&gt;.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;Weights (4-bit)&lt;/th&gt;
&lt;th&gt;KV Cache (128K ctx)&lt;/th&gt;
&lt;th&gt;Total&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Llama-3-70B&lt;/td&gt;
&lt;td&gt;40GB&lt;/td&gt;
&lt;td&gt;256GB&lt;/td&gt;
&lt;td&gt;296GB&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;Existing quantization (llama.cpp, etc.) only compresses &lt;strong&gt;weights&lt;/strong&gt;. The KV cache still explodes your memory on long conversations.&lt;/p&gt;

&lt;h2&gt;
  
  
  What KVQuant Does
&lt;/h2&gt;

&lt;p&gt;Compresses the &lt;strong&gt;KV cache&lt;/strong&gt; with adaptive quantization based on token importance:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Token Position&lt;/th&gt;
&lt;th&gt;Bits&lt;/th&gt;
&lt;th&gt;Reason&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Recent (0-256)&lt;/td&gt;
&lt;td&gt;4-bit&lt;/td&gt;
&lt;td&gt;Attention often attends here&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Mid (256-1024)&lt;/td&gt;
&lt;td&gt;3-bit&lt;/td&gt;
&lt;td&gt;Medium importance&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Old (1024+)&lt;/td&gt;
&lt;td&gt;2-bit&lt;/td&gt;
&lt;td&gt;Distant context&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  Features
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;4-6x KV cache compression&lt;/strong&gt; with &amp;lt;1% perplexity increase&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Drop-in&lt;/strong&gt; — single pip install, no model recompilation
&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Real-time&lt;/strong&gt; — adds &amp;lt;5ms latency per token&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Cross-platform&lt;/strong&gt; — CUDA, MPS (Apple Silicon), CPU&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Quick Start
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;transformers&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;AutoModelForCausalLM&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;AutoTokenizer&lt;/span&gt;
&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;kvquant&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;KVQuant&lt;/span&gt;

&lt;span class="n"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;AutoModelForCausalLM&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_pretrained&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;meta-llama/Llama-3-8B&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;tokenizer&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;AutoTokenizer&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_pretrained&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;meta-llama/Llama-3-8B&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="k"&gt;with&lt;/span&gt; &lt;span class="nc"&gt;KVQuant&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;target_memory_gb&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mf"&gt;4.0&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="n"&gt;inputs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nf"&gt;tokenizer&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Hello, how are you?&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;return_tensors&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;pt&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="n"&gt;outputs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;generate&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;inputs&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;max_new_tokens&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;100&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Benchmarks
&lt;/h2&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;Context&lt;/th&gt;
&lt;th&gt;Original KV&lt;/th&gt;
&lt;th&gt;Compressed KV&lt;/th&gt;
&lt;th&gt;Ratio&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Llama-3-8B&lt;/td&gt;
&lt;td&gt;128K&lt;/td&gt;
&lt;td&gt;32GB&lt;/td&gt;
&lt;td&gt;8GB&lt;/td&gt;
&lt;td&gt;4x&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;p&gt;GitHub: &lt;a href="https://github.com/AmSach/kvquant" rel="noopener noreferrer"&gt;https://github.com/AmSach/kvquant&lt;/a&gt;&lt;/p&gt;

</description>
      <category>python</category>
      <category>llm</category>
      <category>quantization</category>
    </item>
    <item>
      <title>BTCRouter: Real-Time Bitcoin Fee Estimation Without a Full Node</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 15:30:42 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/btcrouter-real-time-bitcoin-fee-estimation-without-a-full-node-2a0h</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/btcrouter-real-time-bitcoin-fee-estimation-without-a-full-node-2a0h</guid>
      <description>&lt;p&gt;I built &lt;strong&gt;BTCRouter&lt;/strong&gt; because most Bitcoin wallets estimate fees with a simple multiplier — "slow / medium / fast" — and it's often catastrophically wrong. When the mempool is empty, you overpay. When it's full, you underpay and wait 3 days.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem with Current Fee Estimation
&lt;/h2&gt;

&lt;p&gt;Full Bitcoin Core nodes solve this properly, but require:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;150GB+ storage&lt;/li&gt;
&lt;li&gt;All-day initial sync&lt;/li&gt;
&lt;li&gt;Constant upkeep&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Too heavy for embedded devices, mobile apps, or quick scripting.&lt;/p&gt;

&lt;p&gt;&lt;strong&gt;BTCRouter&lt;/strong&gt; uses Blockstream's Electrum API instead — no node required, works from any machine.&lt;/p&gt;

&lt;h2&gt;
  
  
  What BTCRouter Does
&lt;/h2&gt;

&lt;p&gt;A Python library for real-time Bitcoin intelligence:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Fee Estimation&lt;/strong&gt; — 4 tiers (economy/normal/fast/instant) from live mempool data&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Congestion Analysis&lt;/strong&gt; — score 0–100 of on-chain demand with actionable recommendations&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;UTXO Optimization&lt;/strong&gt; — greedy selection minimizing inputs + fees&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;RBF Simulation&lt;/strong&gt; — model Replace-By-Fee scenarios&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Privacy Scoring&lt;/strong&gt; — grade your UTXO set based on address reuse and amount patterns&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  How Fee Estimation Works
&lt;/h2&gt;

&lt;p&gt;Blockstream returns percentile fee data at different block targets:&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Tier&lt;/th&gt;
&lt;th&gt;Block Target&lt;/th&gt;
&lt;th&gt;Typical Wait&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Economy&lt;/td&gt;
&lt;td&gt;24 blocks (~4 hrs)&lt;/td&gt;
&lt;td&gt;~30 min&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Normal&lt;/td&gt;
&lt;td&gt;6 blocks (~1 hr)&lt;/td&gt;
&lt;td&gt;~1 hour&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Fast&lt;/td&gt;
&lt;td&gt;3 blocks (~30 min)&lt;/td&gt;
&lt;td&gt;~10 min&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Instant&lt;/td&gt;
&lt;td&gt;1 block&lt;/td&gt;
&lt;td&gt;next block&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  UTXO Selection Algorithm
&lt;/h2&gt;

&lt;p&gt;Uses &lt;strong&gt;greedy selection by value&lt;/strong&gt; — sort UTXOs descending, pick largest until total covers target + fee. Minimizes input count → smaller transaction → lower fees.&lt;/p&gt;

&lt;h2&gt;
  
  
  Privacy Scoring
&lt;/h2&gt;

&lt;p&gt;Your UTXO set leaks privacy in subtle ways:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;Address reuse&lt;/strong&gt; — deduct 10pts per duplicate&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Round amounts&lt;/strong&gt; — many UTXOs at exact multiples of 100k sats (likely exchange batches)&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;One dominant UTXO&lt;/strong&gt; — &amp;gt;90% of value in single UTXO (easy to correlate)&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Quick Start
&lt;/h2&gt;

&lt;p&gt;\&lt;/p&gt;

&lt;h2&gt;
  
  
  When to Use BTCRouter
&lt;/h2&gt;

&lt;ul&gt;
&lt;li&gt;Embedded Bitcoin projects (Raspberry Pi, microcontrollers)&lt;/li&gt;
&lt;li&gt;Mobile wallets that can't run a full node&lt;/li&gt;
&lt;li&gt;Trading bots needing accurate fee estimation for batching&lt;/li&gt;
&lt;li&gt;Lightning node operators managing commitment transaction fees&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  Single File, Zero Dependencies
&lt;/h2&gt;

&lt;p&gt;Drop btcrouter.py into any project. No node, no 150GB download. Just live Bitcoin intelligence.&lt;/p&gt;

&lt;p&gt;GitHub: &lt;a href="https://github.com/AmSach/btc-router" rel="noopener noreferrer"&gt;https://github.com/AmSach/btc-router&lt;/a&gt;&lt;/p&gt;

</description>
      <category>python</category>
      <category>bitcoin</category>
      <category>cryptocurrency</category>
      <category>opensource</category>
    </item>
    <item>
      <title>QueryFS - SQL Query Your Filesystem</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 11:59:33 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/queryfs-sql-query-your-filesystem-43pi</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/queryfs-sql-query-your-filesystem-43pi</guid>
      <description>&lt;h2&gt;
  
  
  QueryFS
&lt;/h2&gt;

&lt;p&gt;Query your files with SQL. No database needed.&lt;/p&gt;

&lt;h3&gt;
  
  
  What It Does
&lt;/h3&gt;

&lt;p&gt;Mount your filesystem as a queryable database. Run SQL queries against files.&lt;/p&gt;

&lt;h3&gt;
  
  
  Features
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;SELECT * FROM /path LIMIT 10&lt;/li&gt;
&lt;li&gt;WHERE clauses: size &amp;gt; 1MB, name LIKE '%.py'&lt;/li&gt;
&lt;li&gt;Output formats: json, csv, table&lt;/li&gt;
&lt;li&gt;Zero dependencies&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  GitHub
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://github.com/AmSach/queryfs" rel="noopener noreferrer"&gt;https://github.com/AmSach/queryfs&lt;/a&gt;&lt;/p&gt;

&lt;h3&gt;
  
  
  Install
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;pip &lt;span class="nb"&gt;install &lt;/span&gt;queryfs
queryfs query &lt;span class="s2"&gt;"SELECT * FROM ~/Documents LIMIT 10"&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



</description>
      <category>python</category>
      <category>cli</category>
      <category>tools</category>
      <category>opensource</category>
    </item>
    <item>
      <title>SoundForge - Clone Any Voice in 10 Seconds, Export to C/WASM/ESP32</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 11:41:44 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/soundforge-clone-any-voice-in-10-seconds-export-to-cwasmesp32-2li5</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/soundforge-clone-any-voice-in-10-seconds-export-to-cwasmesp32-2li5</guid>
      <description>&lt;h2&gt;
  
  
  SoundForge
&lt;/h2&gt;

&lt;p&gt;Voice cloning toolkit that generates portable models you own forever, not rent.&lt;/p&gt;

&lt;h3&gt;
  
  
  What It Does
&lt;/h3&gt;

&lt;p&gt;Clone any voice in 10 seconds. Export to browser (WASM), ESP32, or standalone C code. No cloud API required after training.&lt;/p&gt;

&lt;h3&gt;
  
  
  Key Features
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;10-second cloning&lt;/strong&gt; - minimal audio input needed&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Portable exports&lt;/strong&gt; - C, WASM, ONNX formats&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Zero cloud dependency&lt;/strong&gt; - inference runs locally&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Cross-platform&lt;/strong&gt; - browser, ESP32, Arduino&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  How to Use
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Clone a voice&lt;/span&gt;
soundforge clone voice_sample.wav

&lt;span class="c"&gt;# Export to ESP32&lt;/span&gt;
soundforge &lt;span class="nb"&gt;export&lt;/span&gt; &lt;span class="nt"&gt;--target&lt;/span&gt; esp32 &lt;span class="nt"&gt;--output&lt;/span&gt; voice_model.c
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  GitHub
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://github.com/AmSach/soundforge" rel="noopener noreferrer"&gt;https://github.com/AmSach/soundforge&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Built for devs who want voice AI without API bills.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>audio</category>
      <category>embedded</category>
      <category>webassembly</category>
    </item>
    <item>
      <title>VoxelNav - Real-time 3D Semantic Mapping for ROS2 Robots</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 11:41:38 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/voxelnav-real-time-3d-semantic-mapping-for-ros2-robots-48o2</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/voxelnav-real-time-3d-semantic-mapping-for-ros2-robots-48o2</guid>
      <description>&lt;h2&gt;
  
  
  VoxelNav
&lt;/h2&gt;

&lt;p&gt;Real-time 3D semantic voxel mapping for ROS2 robots.&lt;/p&gt;

&lt;h3&gt;
  
  
  What It Does
&lt;/h3&gt;

&lt;p&gt;Takes LiDAR scans + camera feeds and turns them into labeled 3D voxel maps. Knows what is floor, wall, person, furniture, door - then feeds that to Nav2 for smart navigation.&lt;/p&gt;

&lt;h3&gt;
  
  
  Key Features
&lt;/h3&gt;

&lt;ul&gt;
&lt;li&gt;
&lt;strong&gt;O(1) voxel hashing&lt;/strong&gt; - constant-time lookup regardless of map size&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;MobileNetV3 segmentation&lt;/strong&gt; - AI labeling of objects&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Nav2 costmap plugin&lt;/strong&gt; - direct integration with ROS2 navigation&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;100ms latency&lt;/strong&gt; - real-time on Jetson Nano&lt;/li&gt;
&lt;/ul&gt;

&lt;h3&gt;
  
  
  How to Use
&lt;/h3&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight shell"&gt;&lt;code&gt;&lt;span class="c"&gt;# Install&lt;/span&gt;
&lt;span class="nb"&gt;cd &lt;/span&gt;voxelnav &lt;span class="o"&gt;&amp;amp;&amp;amp;&lt;/span&gt; colcon build

&lt;span class="c"&gt;# Run&lt;/span&gt;
ros2 run voxelnav voxelnav_node
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  GitHub
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://github.com/AmSach/voxelnav" rel="noopener noreferrer"&gt;https://github.com/AmSach/voxelnav&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Built for ROS2 robots that need semantic maps without expensive hardware.&lt;/p&gt;

</description>
      <category>robotics</category>
      <category>ros</category>
      <category>cpp</category>
      <category>ai</category>
    </item>
    <item>
      <title>KVQuant: Run 70B LLMs on 8GB RAM with Real-Time KV Cache Compression</title>
      <dc:creator>Aman Sachan</dc:creator>
      <pubDate>Thu, 30 Apr 2026 11:35:54 +0000</pubDate>
      <link>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-real-time-kv-cache-compression-24p0</link>
      <guid>https://forem.com/aman_sachan_126d19c4a2773/kvquant-run-70b-llms-on-8gb-ram-with-real-time-kv-cache-compression-24p0</guid>
      <description>&lt;p&gt;I built KVQuant because I wanted to run 70B parameter models on my gaming laptop. The problem? Even with 4-bit quantization, a 128K context window needs 256GB RAM just for the KV cache.&lt;/p&gt;

&lt;h2&gt;
  
  
  The Problem
&lt;/h2&gt;

&lt;p&gt;When you run an LLM, the memory bottleneck is not the model weights - it is the KV cache.&lt;/p&gt;

&lt;div class="table-wrapper-paragraph"&gt;&lt;table&gt;
&lt;thead&gt;
&lt;tr&gt;
&lt;th&gt;Model&lt;/th&gt;
&lt;th&gt;Weights (4-bit)&lt;/th&gt;
&lt;th&gt;KV Cache (128K ctx)&lt;/th&gt;
&lt;th&gt;Total&lt;/th&gt;
&lt;/tr&gt;
&lt;/thead&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;Llama-3-8B&lt;/td&gt;
&lt;td&gt;5GB&lt;/td&gt;
&lt;td&gt;64GB&lt;/td&gt;
&lt;td&gt;69GB&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;Llama-3-70B&lt;/td&gt;
&lt;td&gt;40GB&lt;/td&gt;
&lt;td&gt;256GB&lt;/td&gt;
&lt;td&gt;296GB&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;&lt;/div&gt;

&lt;h2&gt;
  
  
  The Solution
&lt;/h2&gt;

&lt;p&gt;KVQuant compresses the KV cache in real-time using per-position adaptive quantization.&lt;/p&gt;

&lt;p&gt;Result: 4-6x compression with less than 1% perplexity increase.&lt;/p&gt;

&lt;h2&gt;
  
  
  Usage
&lt;/h2&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="kn"&gt;from&lt;/span&gt; &lt;span class="n"&gt;kvquant&lt;/span&gt; &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="n"&gt;KVQuant&lt;/span&gt;
&lt;span class="n"&gt;compressor&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;KVQuant&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;target_memory_gb&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;8&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;compressor&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;wrap&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;GitHub: &lt;a href="https://github.com/AmSach/kvquant" rel="noopener noreferrer"&gt;https://github.com/AmSach/kvquant&lt;/a&gt;&lt;/p&gt;

</description>
      <category>python</category>
      <category>llm</category>
      <category>ai</category>
      <category>opensource</category>
    </item>
  </channel>
</rss>
