Transformers.js + WebGPU: Run a local LLM in your browser (Single Page)

No Python, no Node.js, no API keys, and no server costs. Just one HTML file. (And really small nostalgic models…)

How to run it:

  1. Create a new file on your computer named chat.html.

  2. Paste the code below into it.

  3. Open that file in Chrome or Edge (WebGPU support is best there).

  4. Wait for the model to download (it caches after the first run!) and start chatting.

You can comment out either…

const MODEL = “HuggingFaceTB/SmolLM2-360M-Instruct”;

or

const MODEL = “HuggingFaceTB/SmolLM2-135M-Instruct”;

or drop in another model. Are the OpenAI Locals on Huggingface yet?

Here’s some bad code…

One-Page Local Chat Example
<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>Transformers.js Browser Chat (MVP)</title>
  <style>
    :root{
      --bg:#0b1020; --card:#121a33cc; --card2:#0f1730cc;
      --text:#e7ecff; --muted:#a8b2d6; --stroke:#2a3563;
      --accent:#7c5cff; --good:#19c37d; --warn:#fbbf24; --bad:#ef4444;
      color-scheme: dark;
    }
    *{box-sizing:border-box}
    body{
      margin:0; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Arial;
      background: radial-gradient(1200px 600px at 20% 0%, #1a2a6b55, transparent 60%),
                  radial-gradient(900px 600px at 90% 20%, #7c5cff44, transparent 55%),
                  linear-gradient(180deg, var(--bg), #070b16);
      color:var(--text);
      min-height:100vh;
    }
    header{
      position:sticky; top:0; z-index:5;
      backdrop-filter: blur(10px);
      background: #060a16aa;
      border-bottom:1px solid #ffffff12;
    }
    .wrap{max-width:980px; margin:0 auto; padding:14px 16px;}
    .top{
      display:flex; gap:14px; align-items:center; justify-content:space-between;
    }
    .brand{display:flex; gap:12px; align-items:center;}
    .logo{
      width:34px; height:34px; border-radius:12px;
      background: radial-gradient(circle at 30% 30%, #ffffffcc, #ffffff00 45%),
                  linear-gradient(135deg, #7c5cff, #19c37d);
      box-shadow: 0 10px 30px #7c5cff33;
    }
    h1{margin:0; font-size:14px; letter-spacing:.02em}
    .sub{font-size:12px; color:var(--muted); margin-top:2px}
    .pill{
      border:1px solid #ffffff1a; background:#0c1227aa;
      padding:8px 10px; border-radius:999px;
      display:flex; gap:10px; align-items:center; white-space:nowrap;
      font-size:12px; color:var(--muted);
    }
    .dot{
      width:8px; height:8px; border-radius:99px; background:var(--warn);
      box-shadow:0 0 0 4px #fbbf2418;
    }
    .dot.ready{background:var(--good); box-shadow:0 0 0 4px #19c37d18;}
    .dot.err{background:var(--bad); box-shadow:0 0 0 4px #ef444418;}

    main .wrap{padding:16px}
    .grid{display:grid; grid-template-columns: 1fr; gap:12px;}
    .panel{
      border:1px solid #ffffff14;
      background: linear-gradient(180deg, var(--card), #0b1020aa);
      border-radius:16px;
      box-shadow: 0 20px 60px #00000055;
      overflow:hidden;
    }
    .panelHead{
      padding:12px 14px; border-bottom:1px solid #ffffff12;
      display:flex; gap:10px; align-items:center; justify-content:space-between;
      background: linear-gradient(180deg, #121a33cc, #101733aa);
    }
    .meta{
      display:flex; gap:10px; align-items:center; flex-wrap:wrap;
      font-size:12px; color:var(--muted);
    }
    code{font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size:11px; color:#cbd5ff}
    .btn{
      appearance:none; border:1px solid #ffffff1a;
      background:#0c1227aa; color:var(--text);
      padding:8px 10px; border-radius:12px;
      cursor:pointer; font-size:12px;
      transition: transform .05s ease, background .15s ease, border-color .15s ease;
    }
    .btn:hover{background:#111a39bb; border-color:#ffffff2a}
    .btn:active{transform: translateY(1px)}
    .btn:disabled{opacity:.55; cursor:not-allowed}
    .chat{
      height:60vh; overflow:auto; padding:14px;
    }
    .msg{display:flex; gap:10px; margin:10px 0;}
    .role{
      width:78px; flex:0 0 auto;
      font-size:11px; letter-spacing:.08em; text-transform:uppercase;
      color:#9aa6d6;
      padding-top:2px;
    }
    .bubble{
      max-width: 86ch;
      border:1px solid #ffffff14;
      background: #0c1227aa;
      padding:10px 12px;
      border-radius: 14px;
      white-space: pre-wrap;
      line-height:1.35;
    }
    .user .bubble{
      background: linear-gradient(180deg, #7c5cff1a, #7c5cff0a);
      border-color:#7c5cff33;
    }
    .assistant .bubble{
      background: linear-gradient(180deg, #19c37d1a, #19c37d0a);
      border-color:#19c37d33;
    }
    .system .bubble{
      background: linear-gradient(180deg, #ffffff12, #ffffff06);
      border-color:#ffffff1a;
      font-style: italic;
      color:#cfd7ff;
    }

    .composer{
      display:flex; gap:10px; padding:12px; border-top:1px solid #ffffff12;
      background: linear-gradient(180deg, #0f1733aa, #0b1020aa);
    }
    textarea{
      flex:1;
      min-height: 52px;
      max-height: 160px;
      resize: vertical;
      border-radius: 14px;
      border:1px solid #ffffff18;
      background:#070b16aa;
      color:var(--text);
      padding:10px 12px;
      font: inherit;
      outline:none;
    }
    textarea:focus{border-color:#7c5cff55; box-shadow: 0 0 0 4px #7c5cff18}
    .side{
      display:flex; flex-direction:column; gap:10px;
    }

    /* Loading overlay */
    .overlay{
      position:fixed; inset:0; z-index:50;
      display:none;
      align-items:center; justify-content:center;
      background: radial-gradient(800px 400px at 50% 30%, #7c5cff22, transparent 65%),
                  rgba(0,0,0,.55);
      backdrop-filter: blur(10px);
      padding:18px;
    }
    .overlay.show{display:flex}
    .modal{
      width:min(640px, 100%);
      border-radius:18px;
      border:1px solid #ffffff18;
      background: linear-gradient(180deg, #0f1733ee, #070b16ee);
      box-shadow: 0 30px 120px #000000aa;
      overflow:hidden;
    }
    .modalTop{padding:14px 14px 10px; display:flex; gap:12px; align-items:center; justify-content:space-between;}
    .big{display:flex; gap:12px; align-items:center;}
    .ring{
      width:38px; height:38px; border-radius:999px;
      background: conic-gradient(from 90deg, var(--accent) var(--p, 0%), #ffffff12 0);
      display:grid; place-items:center;
      box-shadow: 0 0 0 6px #7c5cff15;
      transition: background .15s ease;
    }
    .ring::after{
      content:"";
      width:28px; height:28px; border-radius:999px;
      background:#0b1020;
      border:1px solid #ffffff12;
    }
    .modalBody{padding:0 14px 14px;}
    .title{font-weight:700; font-size:13px}
    .line{font-size:12px; color:var(--muted); margin-top:2px}
    .bar{
      margin-top:12px;
      height:10px; border-radius:999px;
      background:#ffffff10;
      border:1px solid #ffffff12;
      overflow:hidden;
    }
    .fill{
      height:100%;
      width: var(--w, 0%);
      background: linear-gradient(90deg, #7c5cff, #19c37d);
      transition: width .12s ease;
    }
    .smallrow{margin-top:10px; display:flex; gap:10px; align-items:center; justify-content:space-between; flex-wrap:wrap;}
    .kbd{font-size:11px; color:#cbd5ff; border:1px solid #ffffff18; background:#0c1227aa; padding:6px 8px; border-radius:10px;}
    .muted{color:var(--muted); font-size:12px}

    /* typing dots */
    .typing{
      display:inline-flex; gap:5px; align-items:center;
    }
    .typing span{
      width:6px; height:6px; border-radius:999px;
      background:#cbd5ffcc;
      animation: bounce 1.1s infinite ease-in-out;
      opacity:.7;
    }
    .typing span:nth-child(2){animation-delay:.15s}
    .typing span:nth-child(3){animation-delay:.3s}
    @keyframes bounce{
      0%, 80%, 100% { transform: translateY(0); opacity:.55; }
      40% { transform: translateY(-4px); opacity:1; }
    }
  </style>
</head>
<body>
<header>
  <div class="wrap">
    <div class="top">
      <div class="brand">
        <div class="logo"></div>
        <div>
          <h1>Browser Chat (Transformers.js)</h1>
          <div class="sub">Single-file MVP · local inference · cached after first download</div>
        </div>
      </div>
      <div class="pill" title="Runtime status">
        <div id="led" class="dot"></div>
        <div id="status">Idle</div>
      </div>
    </div>
  </div>
</header>

<main>
  <div class="wrap">
    <div class="grid">
      <section class="panel">
        <div class="panelHead">
          <div class="meta">
            Model: <code id="modelId"></code>
            <span>·</span>
            Device: <code id="deviceInfo"></code>
          </div>
          <div style="display:flex; gap:10px; align-items:center;">
            <button class="btn" id="preload">Preload model</button>
            <button class="btn" id="reset">Reset</button>
          </div>
        </div>

        <div id="chat" class="chat" aria-live="polite"></div>

        <div class="composer">
          <textarea id="input" placeholder="Enter to send · Shift+Enter newline"></textarea>
          <div class="side">
            <button class="btn" id="send" disabled>Send</button>
          </div>
        </div>
      </section>
    </div>
  </div>
</main>

<!-- Loading overlay -->
<div id="overlay" class="overlay" aria-hidden="true">
  <div class="modal">
    <div class="modalTop">
      <div class="big">
        <div id="ring" class="ring" style="--p:0%"></div>
        <div>
          <div class="title" id="ovTitle">Loading model…</div>
          <div class="line" id="ovLine">Preparing downloads</div>
        </div>
      </div>
      <div class="kbd" id="ovPct">0%</div>
    </div>
    <div class="modalBody">
      <div class="bar"><div id="barFill" class="fill" style="--w:0%"></div></div>
      <div class="smallrow">
        <div class="muted" id="ovFile">—</div>
        <div class="muted" id="ovBytes">—</div>
      </div>
      <div class="smallrow">
        <div class="muted">Tip: first load can be ~100–200MB depending on model files.</div>
        <div class="muted">Cached after first load.</div>
      </div>
    </div>
  </div>
</div>

<script type="module">
import { pipeline, env } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";

  // Skip local checks for demo purposes
  env.allowLocalModels = false;
  
  // CHANGED: 360M is much smarter than 135M but still fast in browser
  //const MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct";

 const MODEL = "HuggingFaceTB/SmolLM2-135M-Instruct"; 


  const el = {
    led: document.getElementById("led"),
    status: document.getElementById("status"),
    modelId: document.getElementById("modelId"),
    deviceInfo: document.getElementById("deviceInfo"),
    chat: document.getElementById("chat"),
    input: document.getElementById("input"),
    send: document.getElementById("send"),
    reset: document.getElementById("reset"),
    preload: document.getElementById("preload"),
    overlay: document.getElementById("overlay"),
    ring: document.getElementById("ring"),
    ovTitle: document.getElementById("ovTitle"),
    ovLine: document.getElementById("ovLine"),
    ovPct: document.getElementById("ovPct"),
    ovFile: document.getElementById("ovFile"),
    ovBytes: document.getElementById("ovBytes"),
    barFill: document.getElementById("barFill"),
  };

  el.modelId.textContent = MODEL;
  el.deviceInfo.textContent = (navigator.gpu ? "webgpu available" : "cpu/wasm");
  
  // CHANGED: Stronger system prompt to keep it in character
  const SYSTEM = { role: "system", content: "You are a helpful and practical AI assistant named Smol." };
  
  let history = [];
  let generator = null;
  let loadingPromise = null;
  let busy = false;

  // ---- UI helpers
  const fmtBytes = (n) => {
    if (!Number.isFinite(n) || n <= 0) return "0 B";
    const u = ["B","KB","MB","GB"];
    let i = 0, v = n;
    while (v >= 1024 && i < u.length - 1) { v /= 1024; i++; }
    return `${v.toFixed(v >= 10 || i === 0 ? 0 : 1)} ${u[i]}`;
  };

  function setLED(mode){
    el.led.classList.remove("ready","err");
    if (mode === "ready") el.led.classList.add("ready");
    if (mode === "err") el.led.classList.add("err");
  }
  function setStatus(t){ el.status.textContent = t; }

  function showOverlay(on){
    el.overlay.classList.toggle("show", !!on);
    el.overlay.setAttribute("aria-hidden", on ? "false" : "true");
  }

  function setOverlayProgress(pct, line, file, loaded, total){
    const p = Math.max(0, Math.min(100, Math.round(pct)));
    el.ring.style.setProperty("--p", `${p}%`);
    el.barFill.style.setProperty("--w", `${p}%`);
    el.ovPct.textContent = `${p}%`;
    if (line) el.ovLine.textContent = line;
    if (file) el.ovFile.textContent = file;
    if (Number.isFinite(loaded) && Number.isFinite(total) && total > 0) {
      el.ovBytes.textContent = `${fmtBytes(loaded)} / ${fmtBytes(total)}`;
    } else {
      el.ovBytes.textContent = "—";
    }
  }

  function scrollBottom(){ el.chat.scrollTop = el.chat.scrollHeight; }

  function addMsg(role, content){
    const row = document.createElement("div");
    row.className = `msg ${role}`;
    
    const who = document.createElement("div");
    who.className = "role";
    who.textContent = role;

    const bubble = document.createElement("div");
    bubble.className = "bubble";
    if (content instanceof Node) bubble.appendChild(content);
    else bubble.textContent = content;

    row.appendChild(who);
    row.appendChild(bubble);
    el.chat.appendChild(row);
    scrollBottom();
    return bubble;
  }

  function typingNode(){
    const s = document.createElement("span");
    s.className = "typing";
    s.innerHTML = "<span></span><span></span><span></span>";
    return s;
  }

  function resetChat(){
    history = [];
    el.chat.innerHTML = "";
    addMsg("system", "Local in-browser chat. Click “Preload model” or just send a message (first run downloads + caches).");
  }

  function getMessages(){
    const MAX_TURNS = 6;
    const trimmed = history.slice(-MAX_TURNS * 2);
    return [SYSTEM, ...trimmed];
  }

  // ---- Progress aggregation
  const fileMap = new Map(); 
  function overallPct(){
    let loaded = 0, total = 0;
    for (const v of fileMap.values()){
      if (Number.isFinite(v.total) && v.total > 0){
        loaded += (v.loaded || 0);
        total += v.total;
      }
    }
    if (total <= 0) return 0;
    return (loaded / total) * 100;
  }

  function progressCallback(info){
    const st = info?.status;
    const file = info?.file || "";
    
    if (st === "initiate") {
      el.ovTitle.textContent = "Downloading model…";
      setOverlayProgress(overallPct(), `Starting ${file || "download"}`, file || "—");
      showOverlay(true);
      setStatus("Downloading…");
      setLED();
    } else if (st === "download") {
      el.ovTitle.textContent = "Downloading model…";
      setOverlayProgress(overallPct(), `Downloading ${file || "files"}`, file || "—");
      showOverlay(true);
      setStatus("Downloading…");
      setLED();
    } else if (st === "progress") {
      if (file) fileMap.set(file, { loaded: info.loaded, total: info.total });
      const pct = overallPct();
      setOverlayProgress(pct, `Downloading ${file || "files"}`, file || "—", info.loaded, info.total);
      showOverlay(true);
      setStatus("Downloading…");
      setLED();
    } else if (st === "done") {
      if (file && fileMap.has(file)) {
        const v = fileMap.get(file);
        fileMap.set(file, { loaded: v.total ?? v.loaded, total: v.total ?? v.loaded });
      }
      const pct = overallPct();
      setOverlayProgress(pct, `Finishing ${file || "files"}`, file || "—");
      showOverlay(true);
      setStatus("Finalizing…");
      setLED();
    } else if (st === "ready") {
      setOverlayProgress(100, "Ready", info.model || "—");
      setTimeout(() => showOverlay(false), 250);
      setStatus("Ready");
      setLED("ready");
    }
  }

  async function ensureModel(){
    if (generator) return generator;
    if (loadingPromise) return loadingPromise;

    fileMap.clear();
    showOverlay(true);
    el.ovTitle.textContent = "Loading model…";
    setOverlayProgress(0, "Preparing…", "—");

    loadingPromise = (async () => {
      // Force loading correct tokenizer and model settings
      const gen = await pipeline("text-generation", MODEL, { 
        progress_callback: progressCallback,
        dtype: "q8", // optional: forces quantization if available for lighter load
      });
      return gen;
    })();

    try{
      generator = await loadingPromise;
      setStatus("Ready");
      setLED("ready");
      return generator;
    } catch (e){
      console.error(e);
      setStatus("Load error");
      setLED("err");
      showOverlay(false);
      throw e;
    } finally{
      loadingPromise = null;
    }
  }

  function setBusy(on){
    busy = !!on;
    el.send.disabled = busy || !el.input.value.trim();
    el.reset.disabled = busy;
    el.preload.disabled = busy || !!generator || !!loadingPromise;
  }

  async function sendMessage(raw){
    const text = (raw ?? "").trim();
    if (!text || busy) return;
    
    setBusy(true);
    addMsg("user", text);
    history.push({ role: "user", content: text });
    
    const bubble = addMsg("assistant", typingNode());
    setStatus("Generating…");
    setLED();
    
    try{
      const gen = await ensureModel();
      const msgs = getMessages();

      // Standard ChatML format construction
 const prompt = gen.tokenizer.apply_chat_template(msgs, {
  tokenize: false,
  add_generation_prompt: true,
});


      // Generate

      const out = await gen(prompt, { 
          max_new_tokens: 250,
          temperature: 0.5,   // Higher temp prevents looping/boring answers
         // do_sample: true,
         // top_p: 0.95,
         // repetition_penalty: 1.15, // CRITICAL: stops it from repeating itself
          return_full_text: false
      });
/*
      const out = await gen(prompt, {
  max_new_tokens: 180,
  do_sample: false,
  return_full_text: false,
});
 */ 

      let reply = out[0].generated_text;
      
      // Cleanup cleanup
      if (reply.includes("<|im_end|>")) reply = reply.split("<|im_end|>")[0];
      if (reply.includes("<|im_start|>")) reply = reply.split("<|im_start|>")[0];
      
      const safe = (reply.trim().length > 0) ? reply.trim() : "...";
      
      bubble.textContent = safe;
      history.push({ role: "assistant", content: safe });
      
      setStatus("Ready");
      setLED("ready");
    } catch (e){
      console.error(e);
      bubble.textContent = "Error: " + (e?.message ?? String(e));
      setStatus("Error");
      setLED("err");
    } finally{
      setBusy(false);
      el.input.focus();
    }
  }

  // ---- events
  el.input.addEventListener("input", () => {
    el.send.disabled = busy || !el.input.value.trim();
  });
  el.input.addEventListener("keydown", (e) => {
    if (e.key === "Enter" && !e.shiftKey){
      e.preventDefault();
      const t = el.input.value;
      el.input.value = "";
      el.send.disabled = true;
      sendMessage(t);
    }
  });
  el.send.addEventListener("click", () => {
    const t = el.input.value;
    el.input.value = "";
    el.send.disabled = true;
    sendMessage(t);
  });
  el.preload.addEventListener("click", async () => {
    if (busy) return;
    setBusy(true);
    try{
      await ensureModel();
    } finally{
      setBusy(false);
      el.input.focus();
    }
  });
  el.reset.addEventListener("click", () => {
    if (busy) return;
    resetChat();
    el.input.focus();
    el.send.disabled = !el.input.value.trim();
  });

  // init
  resetChat();
  setStatus("Idle");
  setLED();
  setBusy(false);
  el.input.focus();
</script>
</body>
</html>

One-Page Local Chat TINY Example
 <!doctype html>

<html lang="en">

<head>

  <meta charset="utf-8" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />

  <title>Transformers.js Browser Chat</title>

  <style>

    :root{ --bg:#0b1020; --text:#e7ecff; --accent:#7c5cff; font-family: sans-serif; }

    body{ background: var(--bg); color: var(--text); margin: 0; display: flex; justify-content: center; min-height: 100vh; }

    .container{ width: 100%; max-width: 800px; padding: 20px; display: flex; flex-direction: column; gap: 15px; }

    .chat-box{ flex: 1; border: 1px solid #ffffff22; border-radius: 12px; padding: 15px; overflow-y: auto; height: 60vh; background: #121a33; }

    .msg{ padding: 10px; margin: 5px 0; border-radius: 8px; max-width: 85%; }

    .user{ background: #7c5cff33; align-self: flex-end; margin-left: auto; }

    .assistant{ background: #19c37d33; }

    .controls{ display: flex; gap: 10px; }

    input{ flex: 1; padding: 12px; border-radius: 8px; border: 1px solid #ffffff22; background: #00000033; color: white; }

    button{ padding: 12px 20px; background: var(--accent); border: none; border-radius: 8px; color: white; cursor: pointer; }

    button:disabled{ opacity: 0.5; }

    #status{ font-size: 12px; color: #8899ac; text-align: center; }

  </style>

</head>

<body>

<div class="container">

  <div style="display:flex; justify-content:space-between; align-items:center;">

    <h2>Browser Chat</h2>

    <div id="status">Ready to load</div>

  </div>

  <div id="chat" class="chat-box"></div>

  <div class="controls">

    <input id="input" placeholder="Type a message..." disabled />

    <button id="send" disabled>Send</button>

  </div>

</div>




<script type="module">

import { pipeline, env } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0";




// Disable local checks for simple browser usage

env.allowLocalModels = false;

const MODEL = "HuggingFaceTB/SmolLM2-360M-Instruct";
// const MODEL = "HuggingFaceTB/SmolLM2-135M-Instruct"; 




const el = {

  chat: document.getElementById("chat"),

  input: document.getElementById("input"),

  send: document.getElementById("send"),

  status: document.getElementById("status")

};




let generator = null;

let messages = [

  { role: "system", content: "You are a helpful AI assistant named Smol." }

];




async function loadModel() {

  el.status.textContent = "Downloading model (this happens once)...";

  // Load pipeline

  generator = await pipeline("text-generation", MODEL, {

    dtype: "q8",

    progress_callback: (x) => {

      if(x.status === "progress") el.status.textContent = `Downloading: ${Math.round(x.loaded/1024/1024)}MB`;

    }

  });

  el.status.textContent = "Model Ready!";

  el.input.disabled = false;

  el.send.disabled = false;

  el.input.focus();

}




// Start loading immediately

loadModel();




async function generate() {

  const text = el.input.value.trim();

  if(!text) return;

  

  // User UI

  el.input.value = "";

  el.input.disabled = true;

  el.send.disabled = true;

  appendMsg("user", text);

  messages.push({ role: "user", content: text });




  // Prep prompt

  const prompt = messages.map(m => `<|im_start|>${m.role}\n${m.content}<|im_end|>`).join("\n") + "\n<|im_start|>assistant\n";




  el.status.textContent = "Thinking...";

  

  try {

    const output = await generator(prompt, {

      max_new_tokens: 200,

      temperature: 0.6,

      do_sample: true,

      return_full_text: false

    });

    

    let response = output[0].generated_text.split("<|im_end|>")[0];

    appendMsg("assistant", response);

    messages.push({ role: "assistant", content: response });

    el.status.textContent = "Ready";

  } catch (err) {

    el.status.textContent = "Error: " + err.message;

  }

  

  el.input.disabled = false;

  el.send.disabled = false;

  el.input.focus();

}




function appendMsg(role, text) {

  const div = document.createElement("div");

  div.className = `msg ${role}`;

  div.textContent = text;

  el.chat.appendChild(div);

  el.chat.scrollTop = el.chat.scrollHeight;

}




el.send.onclick = generate;

el.input.onkeydown = (e) => { if(e.key === "Enter") generate(); };

</script>

</body>

</html>

1 Like