[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"report-2026-04-23":3,"4at8swmwqC":551,"WvLtZYzTh9":566,"H2pcntlk7J":576,"SvATWLUdav":586,"JeWGsIwFh9":596,"18ihVeuFgE":739,"J0RVbwLzTz":750,"qfbzz5M5O8":776,"6g8Oom7jl4":787,"b1rakXSDIu":814,"IvlJ17t2jg":935,"Fjz2S1fGEX":994,"Wz1JOdQL9u":1019,"qedSN56VyR":1040,"F0SHC2MiUl":1050,"e9UY1x6diK":1060,"b4WOx5GAW2":1070,"BL905P4g5U":1080,"eHTonKLMzg":1090,"grrJa42eUb":1100,"A5tQ04yHDj":1225,"WRL1c3zYAJ":1236,"WTys9vsfL3":1252,"VMesQx7pL5":1283,"P7WIFZhS7g":1342,"AGfMdl1NsK":1465,"9Iknr1uEg8":1600,"Qq7G8Hc4WU":1644,"LbW9pSxX8K":1669,"oG3T3X6YCj":1694,"Lu7iDHFAoK":1704,"o45hhuS3ID":1714,"Gr9ZBBr1rV":1724,"ODp2msFbQd":1734,"Oj5zUS870J":1744,"3E70OYve62":1754,"KYSazwtjPH":1764,"hxdUF005g9":1885,"xse7dNPEIa":1896,"zzWaBMbzOk":1922,"appqWLdClY":1948,"yOIX1HB6dA":1974,"fABFak6Wwv":2099,"Qn0hdKHMg2":2195,"CboknLWnGg":2220,"fo8AD8wECx":2241,"C5Hxw2SZ7I":2251,"c23WabPbeF":2261,"F7rKR8T1TA":2271,"t7tCzRpxe6":2386,"wThRleTXV5":2416,"MExpIeNORX":2438,"F6zfd6frRg":2501,"z3REnE4Go9":2538,"yEOnvbasYF":2554,"PqsSEmu4Kd":2607,"2gd7CeOfqn":2631,"AGbdXuMvAm":2647,"ZCZfGe7CsY":2709,"3E956awhmD":2719,"RyBYZywdmb":2729,"podbNty6nd":2758,"c3AuuZZJxo":2809,"I2K3XECUXU":2819,"zclzVsmRMq":2829,"6rSwTPcnD0":2852,"AcZqoPhw9w":2879,"b9XaDRxamQ":2889,"qdNvb52FzG":2899,"x8OJsDJH7l":2925,"Ylb1UKmuMQ":2983,"lqoK65KHtp":3037,"RNmYZwv9MO":3053,"9XlhGbV4Fl":3077,"5d7x5Ywm3Y":3146,"iX676pbmXa":3156,"Ya5R0WbHch":3166,"72tbosR3Fn":3244,"JKlvpk2Kdt":3284,"Dt65qGs1YL":3300,"Hu7WaX1pFW":3385,"hLKcpHa0zm":3395,"U1970vUwCP":4157},{"report":4,"adjacent":548},{"version":5,"date":6,"title":7,"sources":8,"hook":15,"deepDives":16,"quickBites":252,"communityOverview":534,"dailyActions":535,"outro":547},"20260216.0","2026-04-23","AI 趨勢日報：2026-04-23",[9,10,11,12,13,14],"alibaba","anthropic","community","github","google","openai","Qwen3.6-27B 以本地 27B 模型打敗雲端 397B 旗艦，Agent 基礎設施競賽與定價戰全面開打。",[17,108,178],{"category":18,"source":9,"title":19,"subtitle":20,"publishDate":6,"tier1Source":21,"supplementSources":24,"tldr":45,"context":57,"mechanics":58,"benchmark":59,"useCases":60,"engineerLens":70,"businessLens":71,"devilsAdvocate":72,"community":75,"hypeScore":95,"hypeMax":96,"adoptionAdvice":97,"actionItems":98},"tech","Qwen3.6-27B 開源釋出：27B 密集模型打敗 397B 旗艦","雲端限縮推動本地 AI 部署浪潮，開源模型競爭格局進入新階段",{"name":22,"url":23},"Simon Willison — Qwen3.6-27B: Flagship-Level Coding in a 27B Dense Model","https://simonwillison.net/2026/Apr/22/qwen36-27b/",[25,29,33,37,41],{"name":26,"url":27,"detail":28},"Hacker News 討論：Qwen3.6-27B","https://news.ycombinator.com/item?id=47863217","社群硬體實測數據，含 M5 Pro、RTX 5090、R9700 等配置的 tokens/s 實測與量化策略討論",{"name":30,"url":31,"detail":32},"Efficienist — Qwen3.6-27B Delivers Flagship Coding in 27B Dense Model","https://efficienist.com/qwen3-6-27b-is-a-27b-dense-model-that-outperforms-a-397b-on-coding-benchmarks/","技術規格與四項主要編程基準數據彙整",{"name":34,"url":35,"detail":36},"Qwen/Qwen3.6-27B(Hugging Face)","https://huggingface.co/Qwen/Qwen3.6-27B","官方模型頁面，含完整架構參數、授權資訊與 GGUF 量化版下載",{"name":38,"url":39,"detail":40},"Reddit r/LocalLLaMA — Qwen 3.6 27B is out","https://redlib.perennialte.ch/r/LocalLLaMA/comments/1ssl1xh/qwen_36_27b_is_out/","社群第一手反應，記錄雲端 API 漲價對本地部署的催化效應 (reddit-1ssl1xh)",{"name":42,"url":43,"detail":44},"Reddit r/LocalLLaMA — Qwen3.6-35B becomes competitive with cloud models","https://redlib.perennialte.ch/r/LocalLLaMA/comments/1ssilc3/qwen3635b_becomes_competitive_with_cloud_models/","系統性 agentic 實驗，記錄 MoE 姊妹款搭配 Agent 框架後超越雲端 API 的具體數據 (reddit-1ssilc3)",{"tagline":46,"points":47},"27B 打敗 397B：效能密度革命，本地部署進入旗艦時代",[48,51,54],{"label":49,"text":50},"技術","Q4 量化版僅 16.8GB，在四項編程基準全面超越前代 807GB 旗艦，SWE-bench Verified 達 77.2%，效能密度提升 14 倍",{"label":52,"text":53},"成本","Apache 2.0 授權，消費級 GPU 可達 25+ tokens/s，雲端 API 漲價之際本地部署優勢倍增，Unsloth 最低 18GB 記憶體可運行",{"label":55,"text":56},"落地","單一 checkpoint 支援雙模推理，MoE 姊妹款搭配 Agent 框架後已在 agentic 任務上正面競爭主流雲端 API","#### 27B 模型規格與效能定位\n\nAlibaba Qwen 團隊於 2026 年 4 月 22 日正式發布 Qwen3.6-27B，定位為「27B 級別旗艦編程模型」。完整模型 55.6GB，Q4_K_M GGUF 量化版僅約 16.8GB，可在單張高端消費級 GPU 上流暢運行，大幅降低本地部署門檻。\n\n同期推出 MoE 姊妹款 Qwen3.6-35B-A3B，總參數 35B 但僅有 3B 激活，兩款並行提供差異化使用場景，滿足不同硬體環境的需求。採用 Apache 2.0 授權，支援商業使用與微調，無任何商業限制。\n\n在四項主要編程基準上，Qwen3.6-27B 全面超越前代旗艦 Qwen3.5-397B-A17B（體積 807GB，是新模型的 14 倍）：SWE-bench Verified 77.2 對 76.2、SWE-bench Pro 53.5 對 50.9、Terminal-Bench 2.0 59.3 對 52.5、SkillsBench 48.2 對 30.0。\n\n> **名詞解釋**\n> SWE-bench Verified：評估 AI 模型解決真實 GitHub issue 能力的標準化基準，涵蓋從修復 bug 到實作新功能的軟體工程任務。\n\n架構上採用 64 層、隱層維度 5120，引入 Gated DeltaNet + Gated Attention 混合塊設計，原生支援 262,144 tokens 上下文視窗，透過 YaRN 技術可擴展至約 100 萬 tokens。整合視覺編碼器，支援文字、圖像、影片多模態輸入。\n\n> **名詞解釋**\n> YaRN(Yet another RoPE extensioN method) ：透過調整 RoPE 旋轉位置編碼參數擴展大語言模型上下文視窗的技術，可在不重新訓練的前提下顯著提升上下文長度上限。\n\n#### 雲端服務收緊成為本地部署催化劑\n\nAnthropic Claude Opus 近期收緊存取限制並調漲定價，無意間為開源本地模型創造了絕佳時機。Reddit r/LocalLLaMA 社群的 u/_raydeStar 精準點出這個「完美風暴」：雲端漲價與強力本地模型同步出現，驅使越來越多開發者認真評估自建方案。\n\nHN 社群的真實硬體實測進一步驗證了本地部署的可行性：M5 Pro(128GB RAM) 以 Q4_K_M 量化達 25.57 tokens/s；RTX 5090(32GB) 以 Q6_K 量化可達 30+ tokens/s；R9700(32GB) 以 Q8 量化約 20 tokens/s。Simon Willison 親身驗證後指出：「本地模型雖尚未達到頂尖商業模型水準，但進步速度極快。」\n\nUnsloth AI 透過 Dynamic GGUFs 技術將最低門檻壓到 18GB 記憶體，進一步拉低硬體需求。社群亦有人提示：只要主機板具備兩個全頻寬 PCIe 插槽，可將模型分拆在兩張 16GB GPU 上運行，成本遠低於單張 RTX 5090 或 R9700。\n\n#### 搭配 Agent 框架的實戰表現\n\nReddit r/LocalLLaMA 社群出現了一份具里程碑意義的研究，完整記錄 Qwen3.6-35B-A3B 在搭配合適 Agent 框架後，已能在 agentic 任務上正面超越多款主流雲端 API，並提供了系統性的跨模型比較數據——這是開源模型競爭格局的重要轉折點。\n\n> **名詞解釋**\n> Agentic 任務：讓 AI 模型自主規劃、分解並執行多步驟任務（如自動修 bug、生成並測試程式碼），而非只進行單輪問答的應用場景。\n\nMoE 架構的關鍵優勢在於推理時僅激活 3B 參數，大幅降低計算開銷，同時維持 35B 總參數帶來的能力廣度。搭配 Agent 框架後，模型的長程推理和工具呼叫效率得到充分發揮，在複雜編程任務中展現出遠超靜態基準測試的實戰能力。\n\n量化策略選擇對 MoE 模型的影響尤為顯著。HN 社群工程師建議優先選用 Q8 或 Q6_UD 量化版，並強調在激活參數極少的 MoE 架構中不進行 KV cache 量化的重要性——即便 KL 散度下降看似微小，對最終推理品質的影響仍然實質可感。\n\n#### 開源模型競爭格局與社群展望\n\nQwen3.6-27B 的發布標誌著開源模型在「效能密度」上達到新高點：以前代旗艦 1/14 的體積，在四項編程基準全面勝出，打破了「更大模型 = 更強效能」的慣性認知。\n\nBluesky 社群的 timkellogg.me 對小模型與 Opus 4.5 的比較感到震驚。多位開發者在完成前端設計與 agentic 基準測試後，表示效能提升遠超預期，Qwen 3.6 27B 相較前代幾乎是跨代躍升。\n\nReddit r/LocalLLaMA 已有研究者針對 MoE 姊妹款的 agentic 表現進行系統性實驗，完整的跨模型比較數據正在引發廣泛討論與跟進研究。未來競爭的核心問題將不再是「開源模型能不能用」，而是「在哪些任務上開源模型已經比商業 API 更划算」。","Qwen3.6-27B 在架構上的核心突破，是以極致的效能密度顛覆了「更大模型 = 更強能力」的慣性認知。在 27B 參數的前提下，它在四項主要編程基準全面超越體積達 807GB 的前代旗艦，背後有三個關鍵機制在發揮作用。\n\n#### 機制 1：Gated DeltaNet + Gated Attention 混合架構\n\n傳統 Transformer 的注意力機制隨序列長度呈二次方成本增長，而 Qwen3.6-27B 引入 Gated DeltaNet（線性注意力變體）與 Gated Attention 的混合設計，在長上下文處理上實現成本與效能的更優平衡。64 層架構搭配 5120 的隱層維度，提供足夠的表達能力以捕捉複雜程式邏輯結構。\n\n> **名詞解釋**\n> Gated DeltaNet：一種線性注意力機制的變體，透過門控機制動態調整記憶更新量，在保持線性時間複雜度的同時提升序列建模能力。\n\n#### 機制 2：YaRN 長上下文擴展\n\n原生支援 262,144 tokens 上下文視窗，透過 YaRN 技術可擴展至約 100 萬 tokens。對於需要分析大型程式碼庫或處理長文件的編程任務，這個能力格外關鍵——完整的上下文視窗讓模型可一次性「看見」整個專案架構，而非分批處理。\n\n#### 機制 3：單一 Checkpoint 雙模推理\n\nQwen3.6-27B 採用單一 checkpoint 同時支援「thinking 模式」與「非思考模式」兩種推理路徑。thinking 模式讓模型展開長程推理鏈以應對複雜任務，非思考模式在速度優先的場景下快速作答，省去維護兩個獨立模型的成本。\n\n> **白話比喻**\n> 想象一位工程師有兩種工作節奏：遇到難題時打開草稿本慢慢推導（thinking 模式），回答日常問題時則直接開口作答（非思考模式）。Qwen3.6-27B 就是這樣一位「同一個人，兩種節奏」的助手——切換模式不需要換人，只需切換一個開關。","#### 主要編程基準 (Qwen3.6-27B vs Qwen3.5-397B-A17B)\n\n- SWE-bench Verified：77.2 vs 76.2(+1.0)\n- SWE-bench Pro：53.5 vs 50.9(+2.6)\n- Terminal-Bench 2.0：59.3 vs 52.5(+6.8)\n- SkillsBench：48.2 vs 30.0(+18.2)\n\n#### 本地推理速度（社群硬體實測）\n\n- M5 Pro（128GB RAM，Q4_K_M 量化）：25.57 tokens/s（Simon Willison 驗證）\n- RTX 5090（32GB，Q6_K 量化）：30+ tokens/s\n- R9700（32GB，Q8 量化）：約 20 tokens/s",{"recommended":61,"avoid":66},[62,63,64,65],"需要本地部署且有隱私合規要求的企業編程助手場景","搭配 Agent 框架的自動化程式碼審查與修復 pipeline","大型程式碼庫分析（原生 262K tokens 上下文，無需分批處理）","受雲端 API 漲價或配額限制影響的高頻編程任務",[67,68,69],"需要超長上下文（>100 萬 tokens）且對 YaRN 延伸品質有嚴格要求的場景","視覺多模態為核心業務的場景（mmproj 視覺編碼器目前有 llama.cpp 載入相容性問題）","對推理 token 消耗極度敏感的場景（thinking 模式可能在簡單問題上燃燒 2000+ tokens）","#### 環境需求\n\n完整 F16 精度需 55.6GB VRAM（單張 A100-80G 或雙張 RTX 4090）；Q4_K_M 量化版約 16.8GB，可在單張 RTX 4090 或 RX 7900 XTX 上運行；Q8 量化約 30GB，適合具備全頻寬雙 PCIe 插槽的雙 16GB GPU 配置。建議 Python 3.10+、transformers >= 4.51.0。\n\n#### 最小 PoC\n\n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = \"Qwen/Qwen3.6-27B\"\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n    model_name,\n    torch_dtype=\"auto\",\n    device_map=\"auto\"\n)\n\nmessages = [{\"role\": \"user\", \"content\": \"撰寫一個 binary search tree 的插入函式\"}]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True,\n    enable_thinking=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\noutputs = model.generate(**model_inputs, max_new_tokens=8192)\nprint(tokenizer.decode(outputs[0][len(model_inputs.input_ids[0]):]))\n```\n\n#### 驗測規劃\n\n建議以 SWE-bench Verified 公開測試集取樣 20 題進行本地基準驗測，比對官方公布的 77.2% 通過率。量化版本 (Q4 vs Q8) 的品質差異可透過 SkillsBench 子集快速量化，SkillsBench 的 48.2 vs 30.0 差距顯著，是最敏感的退化偵測器。\n\n#### 常見陷阱\n\n- thinking 模式在簡單問題上可能消耗 2000+ tokens，生產環境建議設定 max_tokens 上限或依問題複雜度動態切換模式\n- 視覺多模態的 mmproj 視覺編碼器在 llama.cpp 後端目前有載入相容性問題（社群 Day 1 實測回報），純文字任務可先略去視覺組件\n- Q4 量化版在開啟 KV cache 量化時品質下降明顯，MoE 模型尤甚——建議關閉 KV cache 量化\n\n#### 上線檢核清單\n\n- 觀測：tokens/s 推理速度、VRAM 峰值用量、thinking token 比例\n- 成本：量化等級 (Q4/Q6/Q8) 的品質-速度折衷點確認、多 GPU 分片方案的頻寬瓶頸評估\n- 風險：上下文超過 262K tokens 時 YaRN 延伸的品質穩定性、視覺模態功能的生產就緒狀態","#### 競爭版圖\n\n- **直接競品**：Mistral Devstral 2（MoE 架構，編程特化）、DeepSeek-Coder-V2（開源編程模型）、CodeLlama 70B（Meta 開源）\n- **間接競品**：Claude Opus API（商業雲端）、GPT-4o（OpenAI 雲端）、GitHub Copilot（整合式編程助手）\n\n#### 護城河類型\n\n- **工程護城河**：以 27B 超越 397B 的效能密度突破，以及 Gated DeltaNet + Gated Attention 混合架構帶來的長上下文優勢，形成短期技術壁壘\n- **生態護城河**：Apache 2.0 授權吸引企業在 Qwen 架構上進行微調和二次開發，積累的生態適配和調優經驗形成切換成本\n\n#### 定價策略\n\nApache 2.0「零授權費」策略，本質上以生態擴張換取未來雲端 API 流量。Alibaba Cloud 透過開源版本建立開發者黏性，引導有規模化需求的客戶轉向其商業 Qwen API。\n\n#### 企業導入阻力\n\n- 27B 參數仍需專業 GPU 硬體，中小型企業本地部署的硬體採購與維運成本不可低估\n- 缺乏企業級 SLA 支援、SOC 2 及 ISO 27001 等合規認證，大型企業採購流程有顧慮\n\n#### 第二序影響\n\n- 雲端 API 供應商（尤其 Anthropic）的高端定價壓力持續上升，可能加速雲端服務降價或功能差異化\n- 開源編程模型生態加速成熟，企業 AI 工具鏈「自建比採購便宜」的轉折點提前到來\n\n#### 判決：值得立即啟動 PoC（效能已達門檻，視覺與生態仍待驗證）\n\n效能已達雲端替代的嚴肅競爭者水準，但視覺模態問題、thinking token 消耗和企業支援缺口意味著生產環境風險仍在。建議有明確編程自動化場景的團隊立即啟動 PoC，同時保留雲端 fallback，待生態系穩定後再評估全面切換。",[73,74],"Q4_K_M 量化版在 thinking 模式下存在 token 過度消耗問題（社群實測：回應簡單問候即燃燒 2000+ tokens），高頻推理場景的實際成本需重新評估","SWE-bench 等基準成績優異，但難以完全反映真實生產環境的程式碼品質；視覺多模態功能目前有 mmproj 載入問題，標榜的多模態能力尚待實際驗證",[76,80,84,87,91],{"platform":77,"user":78,"quote":79},"Reddit r/LocalLLaMA","u/_raydeStar(Reddit r/LocalLLaMA)","與此同時 Opus 收緊限制並調漲定價——這對本地部署來說是完美的完美風暴。",{"platform":81,"user":82,"quote":83},"Hacker News","syntaxing(HN)","Q8 或 Q6_UD，且不要進行 KV cache 量化。我敢說這在激活參數極少的 MoE 模型上影響更顯著，儘管 KL 散度下降看似微不足道。",{"platform":77,"user":85,"quote":86},"u/ljubobratovicrelja(Reddit r/LocalLLaMA)","過去幾週我自己也在思考這個課題，而你做了我大部分想做的事，非常感謝！驚人的發現，乾杯！",{"platform":88,"user":89,"quote":90},"Bluesky","timkellogg.me（Bluesky，56 upvotes）","Qwen 3.6 27B（密集架構）——兄弟們，他們把這個小模型拿去跟 Opus 4.5 比，而且表現相當不錯，讓我震驚了。",{"platform":92,"user":93,"quote":94},"X","@kylehessling1(X)","我完全震驚了。Qwen 3.6 27B 相較於 Qwen 3.5 27B 簡直是躍升到 Qwen 4 的等級。我跑了完整的前端設計測試和 agentic 基準，全部由它自己生成。結論：比我預期的好太多了，我完全震驚。",4,5,"值得一試",[99,102,105],{"type":100,"text":101},"Try","下載 Q4_K_M GGUF 量化版 (16.8GB) ，在本地 Ollama 或 llama.cpp 跑一輪實際編程任務，直接比對與慣用雲端 API 的品質和速度差異",{"type":103,"text":104},"Build","以 Qwen3.6-35B-A3B（MoE 版）為基座，搭配 LangGraph 或 AutoGen 框架，建構 agentic 編程 pipeline，記錄真實 SWE-bench 式任務的完成率",{"type":106,"text":107},"Watch","追蹤 Unsloth Dynamic GGUFs 更新進度、視覺多模態 mmproj 修復狀態，以及 Reddit r/LocalLLaMA 社群的 MoE agentic 系統性評測跟進報告",{"category":18,"source":14,"title":109,"subtitle":110,"publishDate":6,"tier1Source":111,"supplementSources":114,"tldr":131,"context":140,"mechanics":141,"benchmark":142,"useCases":143,"engineerLens":154,"businessLens":155,"devilsAdvocate":156,"community":160,"hypeScore":95,"hypeMax":96,"adoptionAdvice":170,"actionItems":171},"OpenAI 推出 ChatGPT Workspace Agent，Codex 驅動企業級自動化","從個人助理到跨部門數位員工，Workspace Agent 重新定義 ChatGPT 的企業角色",{"name":112,"url":113},"OpenAI","https://openai.com/index/introducing-workspace-agents-in-chatgpt/",[115,119,123,127],{"name":116,"url":117,"detail":118},"9to5Mac","https://9to5mac.com/2026/04/22/openai-updates-chatgpt-with-codex-powered-workspace-agents-for-teams/","Codex 驅動 Workspace Agent 的功能細節與企業使用場景報導",{"name":120,"url":121,"detail":122},"The Decoder","https://the-decoder.com/openai-launches-workspace-agents-that-turn-chatgpt-from-a-chatbot-into-a-team-automation-platform/","ChatGPT 從聊天機器人轉型為團隊自動化平台的深度分析",{"name":124,"url":125,"detail":126},"Prism News","https://www.prismnews.com/news/openai-launches-cloud-workspace-agents-for-chatgpt-business","企業雲端 agent 競爭策略定位分析",{"name":128,"url":129,"detail":130},"OpenAI Developers — Codex Pricing","https://developers.openai.com/codex/pricing","Codex 模型 credit 計費費率官方文件",{"tagline":132,"points":133},"ChatGPT 進化為跨部門數位員工——睡覺時它還在工作",[134,136,138],{"label":49,"text":135},"Workspace Agent 底層由 Codex 引擎驅動，具備持久記憶、跨 session 學習、排程執行與 MCP 伺服器整合，實現真正的雲端非同步自動化工作流程。",{"label":52,"text":137},"免費試用期至 2026 年 5 月 6 日，之後採 credit-based 計費；GPT-5.4 輸出費率為 375 credits/1M tokens，長期帳單難以預測，需審慎評估。",{"label":55,"text":139},"目前限定 Business、Enterprise、Edu 方案；Enterprise 預設停用需管理員開啟；EKM 帳號暫不支援，全面部署預計需 3-6 個月安全審查準備期。","#### 章節一：Workspace Agent 功能與運作機制\n\nOpenAI 於 2026 年 4 月 22 日正式發布 ChatGPT Workspace Agents，定位為「GPTs 的進化版本」，核心目標是讓組織內的自動化 agent 能跨工具、跨團隊執行複雜的多步驟工作流程。\n\nWorkspace Agent 具備持久記憶 (persistent memory) ，可跨 session 累積學習，效能隨使用時間持續提升。支援整合的工具涵蓋檔案系統、程式碼執行環境、連線 App、排程觸發器及自訂 MCP 伺服器，並可在使用者不在線的情況下持續排程執行。\n\n典型用例包含四類：Software Reviewer（政策合規檢查＋自動建立 IT ticket）、Product Feedback Router（多頻道監控＋週報彙整）、Weekly Metrics Reporter（自動蒐集並分發資料）、Lead Scoring（銷售線索評分與外展自動化）。\n\n值得注意的是，現有 GPT 可透過即將推出的轉換工具直接升級為 Workspace Agent，大幅降低既有使用者的遷移門檻。\n\n#### 章節二：Codex 引擎的雲端自動化架構\n\nWorkspace Agent 底層由 OpenAI 的 Codex 引擎驅動，以 GPT-5.3-Codex 模型作為核心推論引擎，同時支援 GPT-5.4 與 GPT-5.4-mini 模型執行 agent 任務。\n\n雲端原生架構是這次設計的關鍵突破——agent 在雲端持續運行，不依賴使用者本地裝置或在線狀態，實現真正的非同步自動化。\n\n計費方面，免費試用期至 2026 年 5 月 6 日截止，之後改為 credit-based 計費模式。GPT-5.4 費率為輸入 62.5 credits/1M tokens、輸出 375 credits/1M tokens；GPT-5.3-Codex 則為輸入 43.75 credits/1M tokens、輸出 350 credits/1M tokens。\n\nBusiness 方案提供較大虛擬機以加速雲端任務，Enterprise 方案享有優先請求處理待遇。這套階梯式架構讓企業可依任務複雜度與預算選擇適合的模型版本。\n\n#### 章節三：企業安全與權限管理設計\n\nWorkspace Agent 採用 RBAC 機制，管理員可精細控制誰能建立 agent、誰可使用哪些工具，並提供稽核日誌 (Audit Logs) 與集中管理介面。\n\n> **名詞解釋**\n> RBAC（Role-Based Access Control，角色型存取控制）：依使用者角色（如管理員、一般員工）決定系統存取權限的機制，而非針對個別使用者逐一設定，是企業 IT 治理的主流授權模型。\n\n為降低企業未預期的暴露風險，Enterprise 工作區預設為停用狀態，需管理員主動開啟；使用 EKM(Enterprise Key Management) 的帳號目前暫不支援此功能。\n\nagent 可同時存取行事曆、SharePoint 文件、網路資料與內部系統，若權限設定不當，可能導致敏感資訊誤導向或非預期操作。\n\n平台雖內建 Prompt Injection 攻擊防禦與 Compliance API 監控，但企業導入前仍需審慎規劃存取控制邊界，不可將安全責任全數委由平台承擔。\n\n#### 章節四：對企業 AI 協作生態的影響\n\nWorkspace Agent 的發布標誌著 ChatGPT 在企業場景的定位轉型——從「個人助理」升級為「跨部門數位員工」。「build once， use together， improve over time」的共用模型設計，讓各部門無需重複建置 agent，直接共享持續優化的工作流程。\n\nOpenAI 在 Slack 平台已深度布局：170 個以上的 Connect 頻道、逾 500 萬條訊息歷史（自 2018 年起）。Workspace Agent 可直接嵌入現有溝通流程，而非要求企業改變工作習慣，這是相較於競品的關鍵差異化優勢。\n\n競爭格局面臨直接衝擊。Microsoft Copilot 與 Google Workspace AI 長期深耕企業工作流程，OpenAI 此次直接切入同一領域，使三方正面交鋒更加激烈。\n\n這只是 OpenAI 2026 年 4 月密集發布節奏的一部分，同期還有 ChatGPT Images 2、Codex Chronicle 與 Mac 版 Codex 大更新，顯示 OpenAI 正在加速企業市場的整體布局。","Workspace Agent 的核心架構突破在於三個層面的技術整合——持久記憶、雲端非同步執行、以及跨工具協作能力。這三者的組合讓 agent 從「對話工具」進化為「可持續運作的數位同事」。\n\n#### 機制 1：持久記憶與跨 Session 學習\n\n傳統 ChatGPT 每次對話都從空白狀態開始，而 Workspace Agent 具備持久記憶機制，能跨 session 累積組織特定的知識——包含業務規則、偏好回應格式、常見例外處理方式。\n\n隨著使用時間增長，agent 的輸出品質持續提升，形成良性的學習飛輪。既有 GPT 可透過即將推出的轉換工具直接升級，使組織在 GPTs 時代已沉澱的設定不至於白費。\n\n#### 機制 2：雲端非同步執行引擎\n\n底層由 Codex 引擎（GPT-5.3-Codex 模型）驅動，採用雲端原生架構。agent 在雲端伺服器持續運行，支援排程觸發，不需使用者在線即可執行任務。\n\n這與傳統 RPA 工具「需要本地機器保持開機」的模式截然不同。典型場景如：每週一早上自動彙整銷售數據並寄送報告，或在夜間持續監控多個 Slack 頻道的客訴訊號。\n\n> **名詞解釋**\n> RPA（Robotic Process Automation，機器人流程自動化）：透過軟體機器人模擬人類操作介面（如滑鼠點擊、鍵盤輸入）自動化重複性工作任務的技術，傳統上依賴本地機器保持運行狀態。\n\n#### 機制 3：跨工具整合與 MCP 擴充\n\nWorkspace Agent 可整合的工具類型涵蓋：\n\n- 檔案系統（SharePoint、Google Drive）\n- 程式碼執行環境\n- 連線 App（Slack、行事曆、CRM）\n- 排程觸發器\n- 自訂 MCP(Model Context Protocol) 伺服器\n\nMCP 協定的支援尤為關鍵——企業可將內部系統（ERP、資料庫、內部 API）包裝為 MCP 伺服器，讓 agent 以標準化方式存取，無需 OpenAI 官方提供個別整合。\n\n> **白話比喻**\n> 想像 Workspace Agent 是一位永遠在線的資深員工：你只需要培訓他一次（建立 agent、設定工具權限），之後他會記住所有你教過的事，在你睡覺時繼續工作，而且越做越上手。MCP 伺服器就像公司的門禁卡——讓他只能進入被授權的系統，不會亂闖。","目前為 Research Preview 階段，OpenAI 尚未發布 Workspace Agent 的正式效能基準測試數據。\n\n#### 計費費率參考\n\n以下為可用於成本估算的官方 credit 費率：\n\n- GPT-5.4：輸入 62.5 credits/1M tokens、輸出 375 credits/1M tokens\n- GPT-5.3-Codex：輸入 43.75 credits/1M tokens、輸出 350 credits/1M tokens\n- GPT-5.4-mini：費率較低（詳見 OpenAI 開發者定價頁面）\n\n實際任務成本需依工作流程的輸入輸出比例估算，排程 agent 的長期費用難以預測，上線前建議進行壓力測試並設定用量上限告警。",{"recommended":144,"avoid":149},[145,146,147,148],"跨 Slack 頻道的客訴監控與週報彙整——低風險、高頻率、輸出可人工驗證","銷售線索評分與初步外展訊息草稿生成——可與人工審核流程並行執行","軟體 PR 政策合規初步審查——降低工程師重複性審查負擔","內部資料報告的定期自動蒐集與分發——替代 Excel macro 或 Zapier 工作流程",[150,151,152,153],"財務系統直接操作或付款授權——自動決策風險過高，需保留人工審核節點","EKM 帳號環境——官方明確表示暫不支援，功能無法啟用","需要即時響應（低延遲）的場景——雲端排程架構不適合毫秒級回應需求","含高度敏感個資的流程——GDPR/PDPA 合規審查完成前不宜貿然部署","#### 環境需求\n\n目前僅限 Business、Enterprise、Edu 及 Teachers 方案使用；Enterprise 工作區需管理員主動開啟 Workspace Agent 功能。使用 EKM 的帳號暫不支援，需等待後續更新公告。\n\n建議先在非生產環境（獨立測試工作區）驗證 agent 行為與權限邊界，確認無誤後再推廣至正式部署，避免敏感工具在測試期間被誤觸。\n\n#### 最小 PoC\n\n以「自動彙整 Slack 頻道週報」為最低風險起點：\n\n```text\n1. 在 ChatGPT 建立新 Workspace Agent（命名與用途描述）\n2. 連線目標 Slack 工作區（需管理員授權 OAuth 範圍）\n3. 指定監控頻道：如 #product-feedback、#customer-support\n4. 設定排程：每週五 17:00 觸發\n5. 定義輸出格式：markdown 摘要 + 優先議題列表\n6. 手動觸發一次，驗證輸出品質與頻道存取正確性\n7. 確認無誤後啟用自動排程\n```\n\n#### 驗測規劃\n\n驗測重點應涵蓋三個面向：\n\n- 功能正確性：agent 能否正確識別並執行指定任務，輸出格式是否符合預期範本\n- 權限邊界：agent 是否只存取被授權的資料範圍，不跨越 RBAC 設定的邊界\n- 排程可靠性：定時觸發是否穩定，失敗時是否觸發通知機制（郵件告警或 Slack 推播）\n\n#### 常見陷阱\n\n- 過度授權：初期為求便利給予過大存取範圍，導致 agent 可接觸不應碰觸的敏感資料\n- Prompt Injection 風險殘留：平台雖有內建防護，但外部輸入（如使用者上傳文件、第三方 webhook 內容）仍可能含有惡意指令\n- Credit 成本估算失準：排程 agent 的 token 消耗難以預測，上線前務必設定用量上限與超限告警\n- MCP 伺服器認證疏漏：若自訂 MCP 伺服器未妥善實作 token 驗證，可能成為未授權存取的入口\n\n#### 上線檢核清單\n\n- 觀測：Credit 月用量監控儀表板、任務執行成功率追蹤、輸出品質抽查頻率（前兩週建議每日抽查）\n- 成本：預估月用量 × credits 費率，設定超限告警閾值，向管理層報告預算基準\n- 風險：RBAC 權限矩陣審查（最小權限原則）、稽核日誌保留期限確認（建議 90 天以上）、財務或 HR 等敏感工具是否設有額外人工審批節點","#### 競爭版圖\n\n- **直接競品**：Microsoft Copilot（深度整合 M365 生態，已大規模商業部署）、Google Gemini for Workspace（Google Workspace 原生整合）\n- **間接競品**：Zapier AI、Make.com（工作流程自動化平台）、UiPath（傳統 RPA 轉型 AI）、Slack AI（平台原生 AI 功能，直接競爭 Slack 整合場景）\n\n#### 護城河類型\n\n- **工程護城河**：Codex 模型對多步驟推理與程式碼生成的特化能力；雲端非同步執行架構與持久記憶系統，短期難以快速複製\n- **生態護城河**：OpenAI 在 Slack 平台 170 個以上 Connect 頻道、500 萬條訊息歷史的深度整合；GPTs 龐大既有用戶基礎可直接升級，大幅降低遷移阻力\n\n#### 定價策略\n\n採用 credit-based 計費模式，免費試用期至 2026 年 5 月 6 日。這種結構將 agent 用量綁定於現有 ChatGPT 訂閱，降低企業導入的決策摩擦——不需要額外預算審批新工具，只需在現有方案內增加用量。\n\n長期來看，credit 費率的不透明性（月帳單難以預測）可能成為企業財務部門的顧慮點，這恰好是 Microsoft Copilot 固定月費模式的相對優勢。\n\n#### 企業導入阻力\n\n- 安全合規審查週期長：agent 跨系統存取的權限設計，須通過 IT 與法務部門評估，一般需 1-3 個月\n- EKM 帳號不支援：排除高安全性需求客戶（金融、政府），縮小短期可觸及市場\n- 角色分工轉變：從「使用 ChatGPT」到「設計並維護 Workspace Agent」需要新技能組合，培訓成本不可忽視\n- 既有工具替換摩擦：已部署 Copilot 或 Google Workspace AI 的組織，替換需要遷移成本與重新培訓\n\n#### 第二序影響\n\n- 企業 IT 採購格局重塑：Workspace Agent 若獲大規模採用，可能取代多個單點 SaaS 工具，衝擊中小型 B2B SaaS 生態\n- 新職位出現：「AI Agent 管理員」可能成為企業 IT 部門的新標準角色，負責 agent 設計、維護與稽核\n- 監管壓力上升：agent 自動決策行為將促使監管機構關注 AI 在企業流程中的責任歸屬，預計帶動合規工具需求\n\n#### 判決：護城河正在成形，企業應啟動沙盒評估（全面部署需 3-6 個月準備）\n\nWorkspace Agent 的技術整合深度與生態布局已超越一般 AI 助手定位。對 OpenAI 既有企業訂閱用戶而言，沙盒 PoC 的機會成本極低，值得立即啟動。\n\n但 EKM 限制、credit 成本不確定性與安全審查需求，使全面部署至少需要 3-6 個月準備期。Microsoft 與 Google 的反制動作值得同步追蹤。",[157,158,159],"Workspace Agent 高度依賴 OpenAI 雲端服務，一旦服務中斷或 OpenAI 調整政策（如封禁特定地區、提高費率），組織的關鍵業務流程將面臨單點失敗風險，自主可控程度遠低於自建方案","Credit-based 計費模式在 agent 大規模採用後，月帳單可能遠超傳統 SaaS 訂閱，尤其排程 agent 的 token 消耗難以預測，財務部門難以做出可靠的年度預算規劃","「持久記憶累積組織知識」是雙刃劍——錯誤的訓練資料或被操縱的輸入一旦進入記憶體，影響可能長期殘留且難以清除，比一般軟體 bug 更難溯源修復",[161,164,167],{"platform":92,"user":162,"quote":163},"levie(Box CEO)","這可能是軟體走向無頭化 (headless) 迄今最重要的消息，將把知識型工作 agent 帶給大眾。新版 ChatGPT agent 可存取你想要使用的任何工具與資料，並具備完整的程式編寫與工具呼叫能力。",{"platform":81,"user":165,"quote":166},"newtwilly(HN)","這就是我使用 Cursor 的原因。我的公司有付費購買，不過我也可以改用 Claude Code 或更頻繁使用 Codex，因為我同時持有 ChatGPT 企業帳號。或許透過合適的終端機軟體可以解決，但我喜歡用 GUI 介面查看正在運行的 agent 並瀏覽所有對話；另外，它支援在同一工具中使用多個模型供應商，讓我可以在 OpenAI 與 Anthropic 之間靈活切換。",{"platform":92,"user":168,"quote":169},"@thsottiaux（X 用戶）","Workspace agent 的能力令人驚艷。底層由 Codex 驅動，與我們在這裡開源的實作相同。","先觀望",[172,174,176],{"type":100,"text":173},"在 Business 或 Enterprise ChatGPT 帳號建立第一個 Workspace Agent，選擇低風險任務（如 Slack 頻道週報彙整）驗證基本功能與排程穩定性。",{"type":103,"text":175},"設計整合 Slack 與檔案系統的自動化工作流程原型，測試自訂 MCP 伺服器擴充能力，並建立 RBAC 權限矩陣草案供 IT 安全審查。",{"type":106,"text":177},"追蹤 2026 年 5 月 6 日後的 credit 計費實際成本、EKM 支援時程公告，以及 Microsoft Copilot 與 Google Workspace AI 的競品反應動態。",{"category":18,"source":13,"title":179,"subtitle":180,"publishDate":6,"tier1Source":181,"supplementSources":184,"tldr":201,"context":210,"mechanics":211,"benchmark":212,"useCases":213,"engineerLens":223,"businessLens":224,"devilsAdvocate":225,"community":229,"hypeScore":95,"hypeMax":96,"adoptionAdvice":170,"actionItems":245},"Google 發表第八代 TPU 雙晶片架構，為 Agent 推論時代量身打造","TPU 8t 與 TPU 8i 分工訓練與推論，Virgo Network 突破百萬晶片線性擴展",{"name":182,"url":183},"Google Blog","https://blog.google/innovation-and-ai/infrastructure-and-cloud/google-cloud/tpus-8t-8i-cloud-next/",[185,189,193,197],{"name":186,"url":187,"detail":188},"Google Blog：第八代 TPU 詳細介紹","https://blog.google/innovation-and-ai/infrastructure-and-cloud/google-cloud/eighth-generation-tpu-agentic-era/","官方第八代 TPU 深度技術說明，涵蓋 Virgo Network 與 Boardfly 拓撲設計細節",{"name":190,"url":191,"detail":192},"Hacker News 討論串","https://news.ycombinator.com/item?id=47862497","社群對 TPU 8t/8i 製造商披露 (Broadcom + MediaTek) 與模型行為一致性問題的討論",{"name":194,"url":195,"detail":196},"TechCrunch：Google 發表新 AI 晶片對抗 Nvidia","https://techcrunch.com/2026/04/22/google-cloud-next-new-tpu-ai-chips-compete-with-nvidia/","Patrick Moorhead 分析師歷史警示與市場格局分析",{"name":198,"url":199,"detail":200},"The Decoder：Google Cloud Next '26 報導","https://the-decoder.com/google-unveils-8th-gen-tpus-agent-platform-and-workspace-ai-layer-at-cloud-next-26/","NVIDIA Rubin 對比分析與 Google 雙軌策略解析",{"tagline":202,"points":203},"訓練與推論正式分家，Google 用兩款晶片押注 Agentic 時代",[204,206,208],{"label":49,"text":205},"TPU 8t 單 superpod 達 121 ExaFlops，可擴展至 100 萬顆晶片；TPU 8i 配備 384 MB 片上 SRAM，讓 KV cache 完全駐留片上，專為多輪對話 Agent 的低延遲推論設計。",{"label":52,"text":207},"HN 社群估算 Google 內部 TPU 訓練成本可能比 Nvidia GPU 低一個數量級，但外部定價尚未公布，企業採購的實際成本效益待 2026 年底 GA 後觀察。",{"label":55,"text":209},"兩款晶片 2026 年底 GA 前仍在 Preview 階段；PyTorch 生態遷移成本高，Gemini 模型版本行為不一致問題才是 Agentic pipeline 能否規模化的真正門檻。","#### 章節一：TPU v8 雙晶片分工架構解析\n\n2026 年 4 月，Google 在 Cloud Next '26 正式發表第八代 TPU，做出了標誌性的架構決策：將「一代一款」的單一路線，正式拆分為 TPU 8t（訓練）與 TPU 8i（推論）兩款專用晶片。\n\n[rss-google-ai-6a159b97] 的官方定位開宗明義：「兩款專用晶片，為 Agentic 時代提供動力。」這並非行銷語言，而是對底層工作負載特性的直接回應——訓練晶片追求橫向擴展的算力天花板，推論晶片追求單 token 延遲的極致壓縮。\n\nTPU 8t 單 superpod 算力達 121 ExaFlops，可整合 9,600 顆晶片，透過 Virgo Network 可將單一邏輯叢集擴展至 100 萬顆晶片，Goodput（有效計算時間佔比）超過 97%，interchip 頻寬為前代 2 倍，儲存存取速度為前代 10 倍。\n\nTPU 8i 則配備 288 GB HBM、384 MB 片上 SRAM（前代 3 倍），引入 Boardfly 拓撲架構與 Collectives Acceleration Engine，最高可降低 5 倍 MoE 集體通訊延遲。\n\n兩款晶片均採用 Google 自研 Arm 架構 Axion CPU，製造商分別為 Broadcom(8t) 與 MediaTek(8i) ，預計 2026 年底 GA。\n\n#### 章節二：Agent 工作負載的硬體特化設計\n\nAgent 推論與傳統批次推論的根本差異，在於 KV cache 的存取模式——多輪對話、工具呼叫、上下文保留，使記憶體容量與延遲成為決定用戶體驗的關鍵瓶頸。\n\nTPU 8i 的 384 MB 片上 SRAM 設計，讓 KV cache 可完全保存於片上，避免頻繁存取 HBM 造成的延遲抖動。Google 官方明確表示：「TPU 8i 專為讓 AI Agent 能以極快速度完成多步驟工作流程而設計。」\n\n> **名詞解釋**\n> KV cache(Key-Value Cache) ：大型語言模型推論時，將每輪對話的注意力機制中間結果快取起來，避免重複計算，是影響長對話延遲的核心資料結構。\n\nManaged Lustre 儲存系統可將資料直接送入加速器記憶體，消除 Agent 長上下文場景的 I/O 瓶頸。對比 NVIDIA NVLink 域上限（最多 576 顆晶片），Google 的橫向擴展架構在大規模 Agent 服務上具有結構性優勢。\n\n#### 章節三：社群技術評析與未解疑問\n\nHN 社群對這波發表的態度分歧明顯。技術派關注 Broadcom + MediaTek 製造商組合的供應鏈含義，以及 97% Goodput 在大規模訓練中的實際意義。\n\n懷疑派則指向更核心的問題：硬體性能提升，是否真的會反映在 Gemini 模型的體驗改善，還是只降低 Google 自身的訓練成本？社群用戶 overfeed 點出實務痛點：新模型行為與前代不一致，導致 pipeline 依賴特定版本，代際升級帶來維運負擔。\n\nTechCrunch 引述分析師 Patrick Moorhead 的歷史警示：Google 早在 2016 年就聲稱 TPU 優於 Nvidia GPU，但 Nvidia 市值至今已達 5 兆美元。custom silicon 的技術優勢能否轉化為市場份額，仍是未解之問。\n\n#### 章節四：Google 自研晶片的長期戰略定位\n\nGoogle 此次明確表示不打算取代 Nvidia——2026 年稍晚仍將引入 Nvidia Vera Rubin 晶片，並與 Nvidia 在 Falcon 網路軟體上合作。這是「補充而非替代」的雙軌策略：自研 TPU 服務 Google 內部訓練與 Cloud 特定客戶，Nvidia GPU 服務更廣泛生態。\n\nThe Decoder 指出：NVIDIA Rubin 單晶片峰值算力與記憶體頻寬仍高於 TPU 8t，但 NVLink 的域規模天花板（576 顆）限制了超大規模訓練的可能性。Google 的 Virgo Network 百萬晶片線性擴展，在超大規模預訓練場景具有結構性優勢。\n\nHN 社群估算，Google 內部使用 TPU 訓練模型的成本可能比 Nvidia GPU 低「一個數量級」——這個成本差距，或許才是 Google 垂直整合策略的真正護城河，而非晶片本身的效能數字。","Google 此次雙晶片架構並非簡單的產品線拆分，而是對訓練與推論兩種工作負載在記憶體存取模式、通訊拓撲、延遲敏感度上根本差異的硬體回應。\n\n#### 機制 1：TPU 8t 的超大規模訓練設計\n\nTPU 8t 透過 Virgo Network 實現橫向線性擴展，單一邏輯叢集可達 100 萬顆晶片，Goodput 超過 97%。interchip 頻寬為前代 2 倍，儲存存取速度為前代 10 倍，確保大規模訓練的有效算力利用率。\n\n> **名詞解釋**\n> Goodput（有效計算時間佔比）：在分散式訓練中，扣除通訊等待、記憶體搬移、設備故障等非計算時間後，真正用於模型訓練計算的時間比例。97% 意味著僅 3% 時間用於開銷，是業界極高水準。\n\n#### 機制 2：TPU 8i 的低延遲推論優化\n\nTPU 8i 配備 384 MB 片上 SRAM（前代 3 倍），讓 KV cache 完全駐留片上，消除 HBM 存取造成的延遲抖動。Boardfly 拓撲架構將網路直徑縮減 50%，Collectives Acceleration Engine 專門加速 MoE 模型集體通訊，最高降低 5 倍延遲。\n\n> **名詞解釋**\n> MoE(Mixture-of-Experts) ：一種稀疏神經網路架構，每次推論只激活模型中少數「專家」子網路，可在不增加推論成本的情況下大幅擴大模型總參數量。集體通訊是 MoE 的主要延遲來源之一。\n\n#### 機制 3：Virgo Network 的橫向擴展能力\n\nVirgo Network 是 TPU 8t 突破 NVLink 域規模上限的核心技術。NVIDIA NVLink 最大支援 576 顆晶片組成一個域，而 Virgo Network 可線性擴展至 100 萬顆晶片，在超大規模預訓練場景下提供結構性成本優勢。\n\n> **白話比喻**\n> 把 TPU 8t 想成高速公路系統：Virgo Network 是讓幾百萬輛車都能不塞車的多層立交系統；Goodput 97% 就像道路實際使用效率 97%，幾乎沒有空駛浪費。TPU 8i 則更像特種快遞：專跑短距離、高頻次的任務，每一包都要求最快到達。","#### 算力指標\n\n- TPU 8t 單 superpod：121 ExaFlops\n- TPU 8t superpod 晶片數：9,600 顆\n- TPU 8t 可擴展至：100 萬顆晶片 (Virgo Network)\n- Goodput：> 97%\n- interchip 頻寬：前代 2 倍\n- 儲存存取速度：前代 10 倍\n\n#### TPU 8i 記憶體規格\n\n- HBM：288 GB\n- 片上 SRAM：384 MB（前代 3 倍）\n- 網路直徑縮減：50%（Boardfly 拓撲）\n- MoE 集體通訊延遲降低：最高 5 倍\n\n#### 效率對比\n\n- TPU 8t 性能／瓦特：較 Ironwood 提升 2 倍 (124% performance per watt)\n- TPU 8i 性能／瓦特：較前代提升 117%\n- NVIDIA NVLink 域上限：576 顆晶片（對比 Virgo Network 的 100 萬）",{"recommended":214,"avoid":219},[215,216,217,218],"超大規模基礎模型預訓練（TPU 8t + Virgo Network 百萬晶片擴展）","多輪對話 Agent 服務低延遲推論（TPU 8i 片上 SRAM KV cache）","MoE 架構模型部署（Collectives Acceleration Engine 優化集體通訊）","Google Cloud 原生 Gemini 應用場景與 Vertex AI 整合",[220,221,222],"需要 CUDA 生態相容的既有 PyTorch 工作負載（遷移成本高）","2026 年底 GA 前的高可用生產部署（Preview 階段 SLA 保障有限）","需要跨雲多廠商策略的企業架構（TPU 只能在 Google Cloud 上使用）","#### 環境需求\n\nTPU 8t 與 TPU 8i 均透過 Google Cloud TPU v8 服務存取，需要 Google Cloud 帳戶與 Cloud TPU API 授權。兩款晶片預計 2026 年底 GA，目前仍在 Preview 階段，需申請早期存取資格。建議使用 JAX 框架或 TensorFlow 2.x + XLA 後端；Python 3.10+ 環境。\n\n#### 最小 PoC\n\n```python\nimport jax\nimport jax.numpy as jnp\nimport time\n\n# 確認 TPU 8i 設備可見\ndevices = jax.devices('tpu')\nprint(f\"Available TPU devices: {len(devices)}\")\n\n# 測試片上 SRAM KV cache 的低延遲推論\n@jax.jit\ndef inference_step(model_params, input_ids, kv_cache):\n    return model.forward(input_ids, kv_cache=kv_cache)\n\n# 量測 TTFT（Time to First Token）基準\nstart = time.perf_counter()\nresult = inference_step(params, test_input, cache)\nttft = time.perf_counter() - start\nprint(f\"TTFT: {ttft * 1000:.2f}ms\")\n```\n\n#### 驗測規劃\n\n測試 TPU 8i 的 Agent 低延遲性能時，重點量測多輪對話場景的 P99 延遲，比較啟用片上 SRAM KV cache 前後的差異。建議設計長上下文 (32K+ tokens) 多輪對話場景，分別在 TPU 8i 與 A100/H100 上量測 TTFT 與 TPOT(Time Per Output Token) ，並記錄 MoE 架構模型下的集體通訊開銷。\n\n#### 常見陷阱\n\n- TPU 8t 的 Virgo Network 擴展需要正確的 JAX pjit 並行策略配置，錯誤的 mesh 設定會顯著降低 Goodput\n- TPU 8i 的片上 SRAM KV cache 優勢只在模型能完整配置於片上時才能發揮，過大的模型反而增加 HBM 碎片化風險\n- MediaTek 製造的 TPU 8i 與 Broadcom 製造的 TPU 8t 使用不同驅動版本，混合部署時需注意版本相容性\n- Preview 階段的 API 介面可能在 GA 時變動，避免在此階段硬編碼特定 TPU 拓撲參數\n\n#### 上線檢核清單\n\n- 觀測：TTFT P50/P99、KV cache 命中率、片上 SRAM 使用率、Goodput 百分比\n- 成本：比較 TPU 8i 與 Nvidia H100 的每百萬 token 成本；Agent 多步驟工作流程總費用\n- 風險：2026 年底 GA 前的 SLA 保障範圍、Gemini 模型版本行為一致性、JAX 生態系鎖定風險","#### 競爭版圖\n\n- **直接競品**：NVIDIA H100/H200、Blackwell B200、Vera Rubin（預計 2027）\n- **間接競品**：AWS Trainium/Inferentia、Microsoft Azure Maia、Meta MTIA\n\n#### 護城河類型\n\n- **工程護城河**：Virgo Network 的百萬晶片線性擴展能力，NVIDIA NVLink 最多 576 顆晶片的架構上限難以短期突破；TPU 8i 的片上 SRAM KV cache 設計是針對 Agent 工作負載的特化優勢\n- **生態護城河**：TPU 只能在 Google Cloud 上使用，與 Gemini 模型、Vertex AI、Google Workspace 深度整合，形成「用 Google 模型就用 Google 晶片」的生態鎖定\n\n#### 定價策略\n\nGoogle 官方尚未公布 TPU 8t/8i 的具體定價。HN 社群估算，Google 內部使用 TPU 訓練模型的成本可能比 Nvidia GPU 低「一個數量級」，但這是內部成本，外部客戶能否享受同等折扣仍不明確。\n\nGoogle 明確表示 2026 年稍晚仍將引入 Nvidia Vera Rubin，表明定價策略不試圖在所有場景取代 Nvidia，而是針對特定高價值客戶提供 TPU 選項。\n\n#### 企業導入阻力\n\n- JAX/TensorFlow 生態鎖定：大多數企業 ML 工作負載建立在 PyTorch 上，遷移成本高\n- Gemini 模型行為一致性問題：新模型版本行為不穩定，pipeline 依賴特定版本，代際升級帶來維運負擔\n- 2026 年底 GA 前的早期存取限制：企業採購決策週期與 GA 時程存在落差\n\n#### 第二序影響\n\n- 若 Google 成功將訓練成本降低一個數量級，Gemini 定價空間將大幅擴大，可能引發 OpenAI/Anthropic 的降價壓力\n- MediaTek 進入 AI 訓練晶片代工市場，可能影響台積電在 AI 晶片代工市場的份額分配\n- Boardfly + Collectives Acceleration Engine 的 MoE 優化，暗示 Google 對 MoE 架構 Agent 模型的長期押注\n\n#### 判決：結構性成本優勢存在，生態鎖定是雙面刃（採購前評估遷移成本）\n\nTPU 8t/8i 的技術突破是真實的——百萬晶片擴展能力與 KV cache 片上化是競爭對手短期難以複製的設計。然而 Patrick Moorhead 的歷史警示值得銘記：2016 年 Google 已做出類似宣告，但 Nvidia 市值至今達 5 兆美元。技術領先能否轉化為企業採購選擇，仍取決於定價透明度與生態相容性。",[226,227,228],"Google 的 TPU 已有 10 年歷史，每一代都宣稱比 Nvidia 更好，但從未真正撼動 Nvidia 在企業市場的地位——這次的雙晶片策略有何不同？","Goodput 97% 是在 Google 內部最佳化工作負載下量測的，外部客戶在實際訓練任務中能否複製此數字，仍存在很大不確定性。","雙晶片策略讓客戶需要同時管理兩個不同架構的晶片環境，對中小型 AI 團隊而言，這可能增加而非降低基礎設施複雜度。",[230,233,236,239,242],{"platform":81,"user":231,"quote":232},"overfeed（HN 用戶）","一致性問題——新模型在每個任務上的行為與前代不同。因此你最終建立的 pipeline 會依賴特定的行為模式。如果這是決定性因素，那自架才是唯一解。由於硬體溢價，所有第三方託管的模型都會被棄用，為更新、更好、更高效的模型騰出空間。",{"platform":81,"user":234,"quote":235},"momojo（HN 用戶）","我也在想同樣的問題。也許隨著 Gemma4 和 intelligence density 的發展方向，他們預測不再需要 NPU？",{"platform":81,"user":237,"quote":238},"scottyah（HN 用戶）","我們在哪些方面還沒有 AGI？",{"platform":92,"user":240,"quote":241},"@PatrickMoorhead（Moor Insights & Strategy 首席分析師）","Google Cloud Next 週三在拉斯維加斯開幕，主題演講題為「The agentic cloud」。Google Cloud 在 2025 年第四季以三大雲端廠商中最快的速度成長。現在 Thomas Kurian 需要證明 Google 能將 TPU 和模型動能轉化為持久的企業優勢。",{"platform":92,"user":243,"quote":244},"@wallstengine（X 市場新聞帳號）","Google 在 Google Cloud Next 發表 TPU 8t 與 TPU 8i：TPU 8t 為訓練前沿模型而生；TPU 8i 為推論、低延遲 Agentic AI 工作負載及更複雜的推理任務而設計；8t 每瓦性能提升 124%，8i 提升 117%（對比前代）。",[246,248,250],{"type":100,"text":247},"前往 Google Cloud TPU v8 Preview 頁面申請早期存取資格，測試 TPU 8i 在長對話 Agent 場景的 TTFT 基準，與 H100 做對照實驗量測延遲差異。",{"type":103,"text":249},"評估 JAX + TPU 8i 作為下一代 Agent 服務推論後端；若工作負載以 MoE 架構為主，Collectives Acceleration Engine 的 5 倍延遲降低值得重點驗測。",{"type":106,"text":251},"關注 2026 年底 TPU 8t/8i GA 定價公告，以及 Gemini 模型版本行為一致性的改善進展——後者才是 Agentic pipeline 能否規模化的真正決定因素。",[253,281,300,336,366,401,427,464,496],{"category":254,"source":12,"title":255,"publishDate":6,"tier1Source":256,"supplementSources":259,"coreInfo":266,"engineerView":267,"businessView":268,"viewALabel":269,"viewBLabel":270,"bench":271,"communityQuotes":272,"verdict":279,"impact":280},"ecosystem","Vercel 開源 Agent Skills 工具，一行指令賦予 AI 代理標準化能力",{"name":257,"url":258},"GitHub - vercel-labs/skills","https://github.com/vercel-labs/skills",[260,263],{"name":261,"url":262},"Introducing skills - Vercel Changelog","https://vercel.com/changelog/introducing-skills-the-open-agent-skills-ecosystem",{"name":264,"url":265},"Skills v1.1.1 - Vercel Changelog","https://vercel.com/changelog/skills-v1-1-1-interactive-discovery-open-source-release-and-agent-support","#### 一行指令，讓 AI 代理學會新技能\n\nVercel Labs 推出開源工具 `skills`，讓開發者透過 `npx skills add \u003Cpackage>` 一行指令，為 AI 代理安裝「技能包」——封裝好的指令集，描述特定任務的執行方式，例如「按照團隊規範生成 PR」或「撰寫 release notes」。\n\n> **名詞解釋**\n> 技能包 (skill package) ：封裝特定任務執行邏輯的指令集合，AI 代理讀取後即可按規範執行，概念類似 npm 套件之於 Node.js 生態。\n\n#### 生態規模與技術細節\n\n截至 2026 年 4 月，專案已累積 15,500+ Stars，支援 45+ agent 平台，涵蓋 Claude Code、Cursor、GitHub Copilot、Gemini 等主流工具。\n\n核心命令：\n\n- `npx skills add`：安裝技能包\n- `npx skills find`：互動探索目錄\n- `npx skills update`：更新已安裝技能\n- `npx skills remove`：移除技能\n\n安裝支援 GitHub shorthand、完整 URL、本地路徑，預設 symlink 方式讓更新立即生效。","`skills` 相容 45+ agent 平台，遷移成本極低——只需 `npx skills add owner/repo` 即可將團隊規範固化為可分享的技能包。\n\n本地路徑安裝讓私有技能不必公開發布，symlink 預設使更新立即生效，無需重新安裝。技能包本質是純文字指令集，無執行時相依，與 CLAUDE.md 等 agent-specific 設定互補，適合在多工具混用環境下標準化協作規範。","`skills` 試圖建立 AI 代理能力的「npm 生態」——若成功，將使 agent 工作流最佳實踐跨組織、跨工具標準化。\n\n15,500+ Stars 的早期採用速度顯示需求明確；Vercel 主導的開源策略搭配 skills.sh 技能目錄平台，有望形成中心化的技能分發節點，進一步鞏固 Vercel 在 AI 開發基礎設施的市場地位。","開發者視角","生態影響","",[273,276],{"platform":88,"user":274,"quote":275},"github-trending-js.bsky.social","🚀 飆升！🚀（200+ 顆新 Star）\n\n📦 vercel-labs / skills\n⭐ 15,188(+317)\n🗒 TypeScript\n\n開放代理技能工具 — npx skills",{"platform":92,"user":277,"quote":278},"@ashutosh887_","在 @vercel 找到這個實用的 repo：agent-skills——vercel-deploy 讓你直接叫 Claude 把應用部署到 Vercel，不需登入或繁瑣設定，馬上拿到 preview 連結。react-best-practices 則包含一系列 React 效能技巧，包括 memo、lazy loading 等。","追","AI 代理能力分發的標準化基礎設施，已支援 45+ 平台，直接影響所有使用 coding agent 的開發者工作流。",{"category":254,"source":11,"title":282,"publishDate":6,"tier1Source":283,"supplementSources":286,"coreInfo":294,"engineerView":295,"businessView":296,"viewALabel":269,"viewBLabel":270,"bench":271,"communityQuotes":297,"verdict":298,"impact":299},"Stanley For X：全球首款 AI 內容總監，自動化社群媒體經營",{"name":284,"url":285},"Product Hunt","https://www.producthunt.com/posts/stanley-for-x",[287,290],{"name":288,"url":289},"Stanley X 官方網站","https://x.getstanley.ai/welcome",{"name":291,"url":292,"detail":293},"Stan 平台 Stanley LinkedIn 版新聞稿","https://www.prnewswire.com/news-releases/stan-the-creator-platform-powering-80-000-active-users-launches-stanley-an-ai-head-of-content-for-linkedin-302716013.html","提供 Stan 平台背景數據與定價參考","#### AI 代筆作者上線：語音匹配 + 全自動 X 內容生成\n\nStanley For X 於 2026 年 4 月 22 日在 Product Hunt 上線，定位為「全球首款 AI 內容總監」，專為 X（前 Twitter）平台打造。功能模組涵蓋 Niche 研究、即時趨勢分析、推文與主題串撰寫、贈品活動規劃，以及內容發布一致性追蹤。\n\n> **白話比喻**\n> 想像你雇了一位懂你說話風格的文案顧問，他不只幫你寫推文，還替你分析趨勢、規劃內容主題串——這就是 Stanley 試圖做到的事。\n\n核心差異在於語音匹配 (Voice Matching) ：從用戶既有發文歷史學習寫作風格，而非套用通用模板。\n\n> **名詞解釋**\n> Voice Matching：AI 分析歷史發文，提取語氣與詞彙習慣，讓生成內容聽起來像本人而非機器人。\n\n#### 背景：Stan 平台與 10 天開發紀錄\n\n開發者 Vitalii Dodonov（Stan 共同創辦人）使用同一框架，3 個月內將 X 帳號從 0 成長至 9,600+ 追蹤者。Stan 平台服務超過 60,000 名創作者，ARR 達 $30M。工具由 Vitalii 與知名代筆作者 Pascio 共同開發，從零到上線僅花 10 天。","技術棧由 **Claude(Anthropic)**、**Composio**、**Cloudflare** 三層組成：LLM 作為推理核心，Composio 提供工具呼叫與多平台 API 整合層，Cloudflare 處理邊緣部署與即時同步。\n\n支援 Web、iMessage、SMS、Telegram 多端存取，顯示 AI 工作流程工具正往即時通訊深度整合。10 天開發週期驗證了 Claude + Composio 的快速原型可行性，但 Voice Matching 品質與高併發穩定性仍待大規模驗證。","Stan 平台 $30M ARR 的既有創作者用戶池，讓 Stanley 規避冷啟動問題，直接切入已付費的高價值族群。\n\n相較於通用 AI 寫作工具，Stanley 嵌入了真實代筆作者 (Pascio) 的專業框架，試圖建立差異化護城河。定價採協商制，LinkedIn 版約 $149／月，若 X 版落在相近區間，月均 ROI 需對應帶來明確的粉絲成長或商業轉換才划算。",[],"觀望","AI 社群媒體內容自動化工具進入創作者平台生態，以代筆作者框架差異化競爭；對 X 重度創作者有直接參考價值，整體效果仍需獨立驗證。",{"category":18,"source":14,"title":301,"publishDate":6,"tier1Source":302,"supplementSources":305,"coreInfo":314,"engineerView":315,"businessView":316,"viewALabel":317,"viewBLabel":318,"bench":271,"communityQuotes":319,"verdict":279,"impact":335},"ChatGPT 免費開放美國醫療專業人員，臨床輔助邁入新階段",{"name":303,"url":304},"OpenAI Blog","https://openai.com/index/making-chatgpt-better-for-clinicians",[306,310],{"name":307,"url":308,"detail":309},"Healthcare Dive","https://www.healthcaredive.com/news/openai-launches-chatgpt-health/809094/","醫療產業角度報導",{"name":311,"url":312,"detail":313},"Endpoints News","https://endpoints.news/openais-head-of-health-lays-out-the-ai-giants-healthcare-ambitions/","OpenAI 醫療事業負責人專訪","#### 臨床輔助三大場景\n\nChatGPT for Clinicians 向已驗證身份的美國執業醫師、執業護理師及藥劑師**免費開放**，涵蓋三大應用場景：臨床照護輔助、醫療文件撰寫，以及醫學研究查詢。\n\n系統可合成醫學證據，引用百萬篇同儕審閱研究，附帶期刊名稱與發表日期等可追溯來源。企業版另支援 HIPAA 合規運作，提供資料隔離儲存、客戶自管加密金鑰及稽核日誌，且輸入內容不用於模型訓練。\n\n> **名詞解釋**\n> HealthBench：OpenAI 開發的醫療 AI 評估框架，依據臨床標準評核模型回應的安全性、清晰度與個別情境適當性。\n\n#### 兩年訓練成果\n\nOpenAI 歷時兩年與來自 60 個國家、涵蓋數十個專科的 260 多位醫師合作，累計提供超過 60 萬次評分回饋，涵蓋 30 個健康領域。所有輸出均以 HealthBench 框架評估，評核重點含安全性、清晰度與適當轉介建議。","技術亮點在於**可追溯引用**：每則回應附帶期刊名稱與發表日期，而非僅輸出結論，這對臨床決策的可信賴性至關重要。\n\n企業整合若需對接 EHR 系統，應關注 HIPAA 合規架構三項基礎：資料隔離儲存、客戶自管加密金鑰 (CMEK) 、稽核日誌。HealthBench 作為醫療 AI 評估方法論，也是自行開發臨床輔助系統時值得參考的基準框架。","免費開放是 OpenAI 的醫療市場卡位策略——先讓個人從業者養成依賴習慣，再以企業版 ChatGPT for Healthcare 承接機構採購。\n\nOpenAI 以兩年訓練、260 位醫師、60 萬次回饋為技術背書，試圖在「可信賴性」上建立差異化。醫療 AI 監管門檻高、院方採購周期長，個人從業者直接可及的策略是繞過機構採購瓶頸的關鍵一步。","工程師視角","商業視角",[320,323,326,329,332],{"platform":92,"user":321,"quote":322},"@TinglongDai（Johns Hopkins 教授，醫療 AI 研究者）","你會信任就診時使用 ChatGPT 的醫生嗎？今日，我們在《npj Digital Medicine》發表研究，這是首項針對執業臨床醫師的調查，了解他們如何看待同儕在醫療決策中使用生成式 AI。調查數據令人震驚。",{"platform":92,"user":324,"quote":325},"@Jonas_Vollmer（AI 安全研究者）","急救中心的醫生朋友說：大多數醫生每天都在用 ChatGPT。他們習慣性地把完整的匿名患者病歷（含 X 光片等）貼進個人帳號，目前的採用程度幾乎毫無阻力。",{"platform":88,"user":327,"quote":328},"brittanyepage.bsky.social(8 upvotes)","《JAMA Psychiatry》發表新論文，為治療師提供篩查患者 AI 使用情況的實用指引。透過常規評估，臨床醫師能識別患者如何使用這些工具，並監控潛在危害。",{"platform":88,"user":330,"quote":331},"fintwitter.bsky.social(1 upvote)","OpenAI 推出 ChatGPT for Clinicians，向通過驗證的美國醫師、執業護理師、醫師助理及藥劑師免費提供。",{"platform":88,"user":333,"quote":334},"roxsross.bsky.social(1 upvote)","ChatGPT 現已對美國醫師免費開放。","AI 臨床輔助從企業授權正式走向個人從業者直接可及，醫療 AI 市場競爭格局重組加速。",{"category":18,"source":14,"title":337,"publishDate":6,"tier1Source":338,"supplementSources":341,"coreInfo":351,"engineerView":352,"businessView":353,"viewALabel":317,"viewBLabel":318,"bench":354,"communityQuotes":355,"verdict":279,"impact":365},"OpenAI 開源 Privacy Filter，專攻個資偵測與去識別化",{"name":339,"url":340},"Introducing OpenAI Privacy Filter","https://openai.com/index/introducing-openai-privacy-filter",[342,345,348],{"name":343,"url":344},"openai/privacy-filter - Hugging Face","https://huggingface.co/openai/privacy-filter",{"name":346,"url":347},"openai/privacy-filter - GitHub","https://github.com/openai/privacy-filter",{"name":349,"url":350},"VentureBeat 報導","https://venturebeat.com/data/openai-launches-privacy-filter-an-open-source-on-device-data-sanitization-model-that-removes-personal-information-from-enterprise-datasets","#### 工具定位\n\nOpenAI 以 Apache 2.0 授權開源 Privacy Filter，專為文字 PII 偵測與遮蔽設計。架構採 sparse MoE 設計，1.5B 總參數、50M 激活參數，可在筆電或瀏覽器 (WebGPU) 本地運行，F1 分數達 96%，128K tokens 上下文可處理完整法律文件。\n\n> **名詞解釋**\n> Sparse MoE（稀疏混合專家）：模型雖有大量參數，每次推論只激活少數子網路，達到高容量、低運算的效果。\n\n#### 整合與限制\n\n支援 8 種 PII 類別（姓名、信箱、電話、地址、URL、日期、帳號、密碼），可透過 `pip install` 部署，CLI 工具 `opf` 提供遮蔽、評估、微調三種模式，也支援 Hugging Face Transformers pipeline 與 Transformers.js（含 WebGPU）。\n\n官方聲明這是「遮蔽輔助工具，並非合規或安全保證」，非英語文字與混合格式文件的效能有已知限制，建議在生產部署前進行領域評估。","本地部署最大優勢在於資料不出境——敏感文字可在 CI/CD pipeline 或資料預處理階段直接過濾，無需送至第三方 API。支援領域微調與精確率／召回率調整，可針對醫療、金融等特定場景優化。128K 上下文免去長文件分段拼接的工程複雜度。需注意非英語語料和不常見姓名的漏偵問題，生產前須做領域評估，建議作為多層次隱私架構的一環。","對需符合 GDPR、CCPA 等資料保護法規的企業，本地運行意味著敏感資料不必送至第三方 API，直接降低資料主權風險與合規疑慮。Apache 2.0 授權無商業限制，部署成本極低。官方明確說明此工具不等同合規保證，企業仍需搭配法律審查與多層次隱私架構，不可視為唯一的合規手段。","#### 效能基準\n\n- F1 分數：96%\n- 激活參數：50M（總參數 1.5B）\n- 上下文視窗：128K tokens",[356,359,362],{"platform":88,"user":357,"quote":358},"sungkim.bsky.social(34 upvotes)","OpenAI 發布了一個開放權重模型，用於偵測和遮蔽個人識別資訊 (PII) 。OpenAI Privacy Filter。",{"platform":88,"user":360,"quote":361},"nic221.bsky.social(8 upvotes)","介紹 OpenAI Privacy Filter。#AI #OpenAI #privacy",{"platform":88,"user":363,"quote":364},"timkellogg.me(7 upvotes)","OpenAI Privacy Filter：一個 1.5B-A50M 的純編碼器 transformer，用於標記私人資訊，小巧快速到可在瀏覽器中運行。","本地可執行的 Apache 2.0 開源 PII 過濾工具，直接降低資料外送風險，有資料治理需求的企業可立即評估導入。",{"category":254,"source":10,"title":367,"publishDate":6,"tier1Source":368,"supplementSources":370,"coreInfo":379,"engineerView":380,"businessView":381,"viewALabel":382,"viewBLabel":270,"bench":383,"communityQuotes":384,"verdict":298,"impact":400},"Anthropic 暗示 Pro 與 Max 方案已不敷 Claude 工作負載需求",{"name":120,"url":369},"https://the-decoder.com/anthropic-manager-hints-that-pro-and-max-plans-are-outgrown-by-todays-claude-workloads/",[371,375],{"name":372,"url":373,"detail":374},"The Register","https://www.theregister.com/2026/04/22/anthropic_removes_claude_code_pro/","原始事件報導與企業用戶反彈",{"name":376,"url":377,"detail":378},"Simon Willison's Weblog","https://simonwillison.net/2026/Apr/22/claude-code-confusion/","各平台文件不一致問題分析","#### 訂閱結構的裂縫\n\n2026 年 4 月 21-22 日，Anthropic 悄悄將 Claude Code 從 $20／月的 Pro 訂閱定價頁面移除，改以「✗」標示。用戶強烈反彈後數日內即恢復。Anthropic 成長負責人 Amol Avasare 說明，這屬於「針對約 2% 新用戶的小規模 A/B 測試」，並非正式下架。\n\n> **名詞解釋**\n> A/B 測試：同時向不同用戶群展示不同版本，比較兩組反應以輔助決策的實驗方法。\n\n事件期間，各平台文件出現嚴重不一致：定價頁已移除、Claude Code 產品頁仍列於 Pro、客服文件改為僅提及 Max，甚至 claude.ai 聊天機器人仍告知用戶 Pro 包含 Claude Code。\n\n#### 算力壓力浮出檯面\n\nAvasare 坦言，Max 方案在 Claude Code 與 Cowork 問世前即已推出，「從未針對長期執行的 agent 工作流重新設計」。算力壓力有數據佐證：Anthropic API 90 天可用率僅 98.95%，低於雲端服務 99.99% 業界標準；GPU 租用成本同期上漲 48%。\n\nOpenAI 暫停部分 Sora 服務、GitHub Copilot 暫停新 Pro 方案報名，顯示業界普遍面臨算力瓶頸。","對重度 Claude Code 用戶，算力瓶頸直接衝擊開發穩定性：API 可用率不達業界標準、高峰期每週用量上限不定期收緊，長期 agent 工作流的可靠性存疑。若訂閱改制，建議評估切換至 API 直連（按量計費）或測試 OpenAI Codex、Gemini Code 等替代方案，避免單一供應商鎖定。","此事件揭示 AI 訂閱平台正面臨「用量通膨」困境——每位訂閱者的使用深度遠超原始設計。Anthropic 極可能推出介於 Max 20x（$200／月）與 API 之間的新付費層級。企業採購時應納入算力供應風險，要求明確 SLA 保障，並建立多供應商備援策略以應對潛在漲價。","開發者視角（API／整合／遷移）","#### 算力指標\n\n- Anthropic API 90 天可用率：98.95%（業界標準：99.99%）\n- GPU 租用成本漲幅：+48%(Ornn Compute Price Index)",[385,388,391,394,397],{"platform":88,"user":386,"quote":387},"Ed Zitron（edzitron.com，322 upvotes）","Anthropic 似乎已從其 $20／月的 Pro 訂閱定價頁面移除 Claude Code。有 $20 方案的用戶能確認嗎？",{"platform":88,"user":389,"quote":390},"Ed Zitron（edzitron.com，260 upvotes）","Anthropic 已撤回所有變更，並稱從 Pro 移除 Claude Code 是「針對 2% 新 prosumer 用戶的小規模測試」。但這並不能充分解釋為何支援文件和網站也同步修改。我相信後續還有更多變動。",{"platform":81,"user":392,"quote":393},"vessenes（HN 用戶）","Anthropic 持續拓展定價層級，讓競爭差異化空間變大，這是好事。但我今天 Opus 的 API 費用高達 $250，已把 openclaw 指向 Codex——隨著 Anthropic 不斷拉開定價範圍，我希望 OpenAI 也能跟進。",{"platform":92,"user":395,"quote":396},"@koltregaskes（X 用戶）","在不斷輪迴中，我正在把 Claude 降回 Pro（而非 Max），並升級到 ChatGPT Pro（$200 方案）。我在 Claude Code 上沒有獲得足夠的價值——Max 方案的 token 消耗得太快，而 Codex 不僅看起來更有效率，還能做更多。",{"platform":81,"user":398,"quote":399},"hannahstrawbrry（HN 用戶）","Claude Code 頁面仍顯示它包含在 Pro/Max 方案中。","Anthropic 訂閱架構面臨根本性重設計，企業與重度開發者應提前評估多供應商策略以應對潛在改制衝擊。",{"category":402,"source":13,"title":403,"publishDate":6,"tier1Source":404,"supplementSources":407,"coreInfo":412,"engineerView":413,"businessView":414,"viewALabel":415,"viewBLabel":416,"bench":417,"communityQuotes":418,"verdict":425,"impact":426},"funding","Google 加碼投資 Thinking Machines Lab，Mira Murati 團隊獲數十億美元雲端合約",{"name":405,"url":406},"TechCrunch","https://techcrunch.com/2026/04/22/exclusive-google-deepens-thinking-machines-lab-ties-with-new-multi-billion-dollar-deal/",[408],{"name":409,"url":410,"detail":411},"Google Cloud Press Corner","https://www.googlecloudpresscorner.com/2026-04-22-Thinking-Machines-Expands-Use-of-Google-Cloud-AI-Hypercomputer","官方新聞稿","#### 合約背景：本月第三大前沿 AI 實驗室協議\n\n2026 年 4 月 22 日，Google Cloud 與 Thinking Machines Lab 宣布簽署數十億美元雲端計算協議，於 Google Cloud Next '26（拉斯維加斯）正式公開。這是繼 Anthropic 與 Meta 之後，Google Cloud 本月第三個達成類似規模合作的前沿 AI 實驗室。協議採非獨家設計，Thinking Machines 可同時使用多家雲端服務商。\n\n#### 公司現況與挑戰\n\nThinking Machines 由前 OpenAI 首席技術官 Mira Murati 於 2025 年 2 月創辦，以 120 億美元估值完成 20 億美元種子輪融資，旗下首款產品「Tinker」可自動化建立客製化前沿 AI 模型。員工數超過 130 人，但 Meta 已挖角 7 名創始成員（含 Tinker 首席工程師 Joshua Gross），人才流失壓力不容忽視。","GB300 NVL72 已正式上線 Google Cloud，搭配 A4X Max 虛擬機，訓練與推理速度相較前代提升 2 倍。Jupiter 網路負責強化學習 (RL) 的權重傳輸，配套 GKE、Cloud Spanner 與 Cluster Director 組成完整 AI Hypercomputer 堆疊。有大規模 RL 訓練需求的工程師可優先評估此基礎設施組合。","Google Cloud 本月連簽 Anthropic、Meta、Thinking Machines 三家前沿 AI 實驗室，算力爭奪戰進入白熱化。Thinking Machines 擁有 Mira Murati 光環和 120 億美元估值，但人才流失與早期產品的雙重壓力是潛在風險；Google 此時加碼，反映市場對前 OpenAI 高階創辦團隊技術實力的高度期待。","技術實力評估","市場與投資觀點","#### 效能指標\n\n- A4X Max 虛擬機相比前一代 GPU：訓練與推理速度提升 **2 倍**（Google Cloud 官方數據）",[419,422],{"platform":88,"user":420,"quote":421},"techcrunch.com(10 likes)","Mira Murati 創辦的 Thinking Machines Lab 已與 Google Cloud 簽署數十億美元協議，採用 Nvidia 最新 GB300 晶片作為 AI 基礎設施——此為 TechCrunch 獨家報導。",{"platform":92,"user":423,"quote":424},"@jukan05","《The Information》2026 年 AI 預測：Google 將收購 Thinking Machines Lab；OpenAI 將於 9 月推出自動化 AI 研究助理但成效不如預期；某主要 AI 實驗室將推出月費 1,000 美元的 AI 代理服務。","追整體趨勢","前沿 AI 實驗室算力合約快速向 Google Cloud 集中，雲端基礎設施競爭格局正在重塑，Thinking Machines Lab 也成為觀察後 OpenAI 時代人才去向與創業成敗的重要指標。",{"category":18,"source":14,"title":428,"publishDate":6,"tier1Source":429,"supplementSources":431,"coreInfo":441,"engineerView":442,"businessView":443,"viewALabel":444,"viewBLabel":445,"bench":446,"communityQuotes":447,"verdict":279,"impact":463},"OpenAI 以 WebSocket 加速 Agent 工作流，Codex 延遲大幅降低",{"name":112,"url":430},"https://openai.com/index/speeding-up-agentic-workflows-with-websockets",[432,435,438],{"name":433,"url":434},"OpenAI API Docs - WebSocket Mode","https://developers.openai.com/api/docs/guides/websocket-mode",{"name":436,"url":437},"OpenAI - Unrolling the Codex Agent Loop","https://openai.com/index/unrolling-the-codex-agent-loop/",{"name":439,"url":440},"OpenAI Developer Community","https://community.openai.com/t/websockets-for-responses-api/1374906","#### WebSocket 模式核心機制\n\nOpenAI Responses API 新增 WebSocket 模式，透過持久連線與「連線本地快取 (connection-scoped caching) 」，大幅降低 agentic 工作流的延遲。每條連線在記憶體中僅保留最近一次 response 狀態，後續每輪只需傳送新的 input items 與 `previous_response_id`，省去重傳完整對話歷史的冗餘開銷。\n\n> **名詞解釋**\n> connection-scoped caching：快取範圍僅限單一 WebSocket 連線存活期間，連線關閉後快取隨之釋放，無磁碟 I/O 依賴。\n\n#### 實測效能\n\n針對 20 次以上工具呼叫的 agentic 工作流（如 Codex agent loop），端對端執行速度可提升最高約 40%。連線限時 60 分鐘，超時後需重新連線，可接續已持久化的 response 或以壓縮上下文重起。支援 `store=false` 與 Zero Data Retention(ZDR) ，符合高安全合規需求。","整合 WebSocket 模式的關鍵在於改用事件驅動循環：每輪發送 `response.create` 事件，帶入 `previous_response_id` 及工具輸出。\n\n需注意若 `previous_response_id` 不在快取且 `store=false`，會返回 `previous_response_not_found` 錯誤，需在 agent loop 中加入重連 fallback 邏輯。連線 60 分鐘上限也需納入設計，建議搭配 warm 連線池管理。","對於依賴 Codex 或自建 coding agent 的企業，40% 的延遲降幅直接轉化為開發者體驗提升與 API 成本最佳化，每輪少傳 context token 可降低費用。\n\nZDR 支援讓金融、醫療等高合規場景可安心採用。vLLM v0.10.0 已初步跟進 Responses API，顯示此模式正成為 agent 工作流新標準，宜及早評估遷移路徑。","工程整合影響","商業應用觀點","#### 效能數據\n\n- 20+ 工具呼叫場景：端對端速度提升最高約 40%\n- 連線存活上限：60 分鐘",[448,451,454,457,460],{"platform":81,"user":449,"quote":450},"sasipi247（HN 用戶）","OpenAI Responses API 有 WebSocket 模式，可作為 SSE 的替代方案，效果極佳，在效能上感覺是一次飛躍。我過去一個月一直在此基礎上開發，在 workers 上保持 WebSocket session 熱啟動，並透過 NATS JetStream 做命令路由。這讓 main thread 的 sidecar thread 使用變得非常簡單，worker 會以類似方式對待它們。",{"platform":92,"user":452,"quote":453},"@EmbeddedLLM（X 用戶）","vLLM v0.10.0 剛發布，其最大亮點可能是一個隱藏功能：對 OpenAI /responses API 的初步支援。這聽起來像個小功能，但其實是巨大的市場訊號。業界正朝此方向發展，以建構下一代強大的 agents。",{"platform":81,"user":455,"quote":456},"simonw（HN 知名用戶）","我維護跨多個提供商的抽象層已有幾年。定義標準的最佳嘗試是 OpenAI harmony/responses，但尚未廣泛採用。舊版 OpenAI Chat Completions 更像是臨時標準——幾乎每個提供商都提供複製版，因缺乏正式規格而存在令人沮喪的差異。",{"platform":92,"user":458,"quote":459},"@athyuttamre（OpenAI API 設計師）","推出 Responses API：OpenAI API 的全新核心原語。這是兩年來設計 OpenAI API 心得的結晶，也是我們下一章建構 agents 的基礎。",{"platform":88,"user":461,"quote":462},"startuphub.bsky.social（Bluesky，1 like）","OpenAI 的 Responses API 現在利用 WebSockets，將 AI agent 延遲降低最高 40%，實現更快的模型推理並提升效能。","多輪工具呼叫場景可立即受益，40% 延遲降幅直接降低 API 成本並提升 agent 執行體驗。",{"category":402,"source":11,"title":465,"publishDate":6,"tier1Source":466,"supplementSources":468,"coreInfo":476,"engineerView":477,"businessView":478,"viewALabel":415,"viewBLabel":416,"bench":271,"communityQuotes":479,"verdict":298,"impact":495},"前 OpenAI 研究員 Jerry Tworek 創辦 Core Automation，目標打造最自動化 AI 實驗室",{"name":120,"url":467},"https://the-decoder.com/ex-openai-researcher-jerry-tworek-launches-core-automation-to-build-the-most-automated-ai-lab-in-the-world/",[469,472],{"name":120,"url":470,"detail":471},"https://the-decoder.com/two-startups-want-to-replace-how-ai-learns-one-just-raised-180m-another-is-seeking-up-to-1b/","Two startups want to replace how AI learns",{"name":473,"url":474,"detail":475},"AI Certs","https://www.aicerts.ai/news/ai-startup-funding-core-automation-seeks-1b-weeks-after-launch/","Core Automation Seeks $1B Weeks After Launch","#### 2026 年 1 月舊聞，近期社群討論再度升溫\n\nCore Automation 由前 OpenAI 副總裁 Jerry Tworek 於 2026 年 1 月創立，成立數週後即啟動種子輪募資，目標融資 5 億至 10 億美元。這一已成立逾兩個月的專案，近期因 Business Insider 深度報導及社群對其技術路線的廣泛討論而再度引發關注。\n\n#### 技術主張：持續學習取代一次性預訓練\n\nTworek 認為當前大模型架構「從根本上就有缺陷」——模型透過海量資料一次性預訓練後，遇到新資訊便出現**災難性遺忘**，無法即時吸收新知識。\n\n> **名詞解釋**\n> 災難性遺忘 (Catastrophic Forgetting) ：神經網路在學習新任務時，往往會覆蓋並忘記舊任務的知識，是持續學習的核心技術挑戰。\n\n旗艦模型 **Ceres** 採用持續學習架構，可在生產環境中即時更新模型權重，無需完整重新訓練，並宣稱比主流大模型節省 **100 倍**訓練資料與算力。研究方向涵蓋新型學習演算法與超越 Transformer 的高效架構。","Ceres 的技術主張大膽，但面臨兩大未解難題：萬億參數規模的持續學習尚未驗證，以及持續演化模型的安全審計缺乏標準框架。目前無公開論文或可複現結果，「100 倍效率」的宣稱難以獨立核實。有興趣的工程師可持續追蹤其研究發表，但現階段尚不具備實際評估基礎。","種子輪即尋求 5 至 10 億美元，在 AI 創業史上規模空前。Tworek 在 OpenAI 七年的資歷與頂尖實驗室的招聘能力是強力背書，但公司成立不足一個月便進行億元募資，是典型的願景先行押注。投資者實際上是在賭「後 Transformer 時代」的到來時機，風險與潛在回報同樣極端。",[480,483,486,489,492],{"platform":92,"user":481,"quote":482},"AndrewCurran_（X 用戶）","Jerry Tworek 離開 OpenAI 後的動向備受外界關注。The Information 報導其新創公司 Core Automation 計畫訓練需要更少訓練資料的新型 AI 模型，並透過持續學習持續成長。他們正在尋求籌集 10 億美元。",{"platform":92,"user":484,"quote":485},"kimmonismus（X 用戶）","對此充滿期待：前 OpenAI 副總裁 Jerry Tworek 創立了 Core Automation，這家 AI 新創正籌集 10 億美元，目標重新思考 AI 模型的構建與訓練方式。公司希望破解持續學習難題——讓 AI 能即時從真實世界經驗中學習。",{"platform":88,"user":487,"quote":488},"techmeme.com（Bluesky，3 upvotes）","前 OpenAI 副總裁 Jerry Tworek 共同創辦的 Core Automation 正式啟動，目標打造「世界上最自動化的 AI 實驗室」，團隊來自 OpenAI、Anthropic 和 DeepMind（Business Insider 報導）。",{"platform":81,"user":490,"quote":491},"cjbarber（HN 用戶）","就目前而言，Cowork／Codex 系列針對非技術知識工作者的「專業代理」將是有史以來成長最快的產品類別之一，對眾多軟體企業極具顛覆性——就像新任副總裁加入公司後往往汰換部分軟體供應商一樣。",{"platform":81,"user":493,"quote":494},"_pdp_（HN 用戶）","AI 代理目前最大的問題在於使用場景仍在探索中。我們已向數百位客戶部署這類系統，挑戰在於：AI 代理在商業流程中往往被視為工作流程自動化工具，是既有框架的替代品，但它們能做的遠不止於此。","若持續學習路線成立，將從根本改變模型訓練典範；目前技術主張未經驗證，屬高風險早期押注。",{"category":497,"source":12,"title":498,"publishDate":6,"tier1Source":499,"supplementSources":502,"coreInfo":512,"engineerView":513,"businessView":514,"viewALabel":515,"viewBLabel":516,"bench":271,"communityQuotes":517,"verdict":298,"impact":533},"discourse","GitHub CLI 悄悄加入遙測功能，開發者社群反彈聲浪四起",{"name":500,"url":501},"GitHub Changelog","https://github.blog/changelog/2026-04-22-github-cli-opt-out-usage-telemetry/",[503,506,509],{"name":504,"url":505},"GitHub CLI Telemetry 官方說明頁","https://cli.github.com/telemetry",{"name":507,"url":508},"The Register 報導","https://www.theregister.com/2026/04/22/github_opts_all_cli_users",{"name":510,"url":511},"HN 討論串（403 分）","https://news.ycombinator.com/item?id=47862331","#### 悄悄上線的遙測功能\n\n2026 年 4 月 22 日，GitHub CLI v2.91.0 隨版本更新靜默啟用假名遙測功能，預設為 opt-out——使用者必須主動關閉才能停止資料傳送。官方說明以「agentic adoption 成長，需要了解功能實際使用狀況」為由，但未在事前發出顯著公告。\n\n> **名詞解釋**\n> 假名遙測 (pseudoanonymous telemetry) ：以隨機生成的設備 UUID 取代真實身份，但仍可追蹤同一裝置的跨指令行為模式。\n\n#### 實際收集什麼？\n\nCLI 會傳送指令名稱、使用的 flags、作業系統、CPU 架構、CLI 版本、時間戳記、設備 UUID，以及是否在 CI 環境執行、使用中的 AI agent 身份。關閉方式有三種：\n\n1. `export GH_TELEMETRY=false`\n2. `export DO_NOT_TRACK=true`\n3. `gh config set telemetry disabled`\n\n注意：獨立安裝的 extension 與 agent 可能自行收集資料，不受上述 opt-out 機制約束。","遙測本身不罕見，但 opt-out 而非 opt-in 的設計讓工程師警覺。社群已指出，`gh` 每條指令本就是 GitHub API 呼叫，伺服器端早已有完整 log，額外客戶端遙測的必要性存疑。\n\n建議立即執行 `gh config set telemetry disabled`，並在 CI/CD pipeline 環境變數中加入 `GH_TELEMETRY=false`，避免 agent 行為資料在自動化流程中被靜默收集。各 extension 是否自行蒐集資料需個別確認，opt-out 機制不一定涵蓋。","此次事件折射出更大趨勢：隨著 AI agent 深入開發工作流程，工具供應商對使用行為資料的渴求急速升溫。GitHub 的措辭 (agentic adoption) 暗示，遙測設計的主要目標是追蹤 AI agent 使用模式，而非傳統 CLI 操作。\n\n企業需評估開發工具的資料收集範圍是否符合內部安全政策，CI/CD 自動化場景中 repo 可見性與 owner 資訊均在收集列表，屬敏感資料，應及早確認並關閉，並追蹤 GitHub 是否在社群壓力下改為 opt-in 設計。","實務觀點","產業結構影響",[518,521,524,527,530],{"platform":81,"user":519,"quote":520},"user3939382（HN 用戶）","在未經你同意的情況下監視你也沒關係，這是為了你好。或者說是為了我好。大概是這樣吧？你的意思是結果可以證明手段正當嗎？把尊重當成一種功能特性如何——這個你根本不需要遙測就能確認。",{"platform":81,"user":522,"quote":523},"caymanjim（HN 用戶）","這類聲明所使用的措辭總讓我感到惱火。說「我們希望獲得可見性」，好吧，尚可接受。但說「我們需要」……你們根本不需要。",{"platform":81,"user":525,"quote":526},"brown9-2（HN 用戶）","讓人困惑的是，每一條 gh 指令本質上都只是他們 API 的封裝。",{"platform":88,"user":528,"quote":529},"kat cosgrove / kat.lol（Bluesky，81 upvotes）","GitHub 似乎在未明確告知的情況下，讓所有 CLI 使用者預設開啟遙測資料傳送，並將其用於產品決策。這種做法很偷偷摸摸，應設計為 opt-in 而非 opt-out。使用 `gh config set telemetry disabled` 停用。",{"platform":81,"user":531,"quote":532},"skydhash（HN 用戶）","感知資料更有價值，因為那是唯一能衡量使用者對軟體 UX 挫敗感的方式。某個功能可能被大量使用，但對所有人而言都是痛苦的體驗。","GitHub CLI 預設開啟遙測蒐集 repo 資訊，開發者應立即關閉，企業需評估是否符合安全政策，並追蹤 GitHub 是否改為 opt-in 機制。","#### 段落 1：社群熱議排行\n\n今日最高熱度：Qwen3.6-27B 開源釋出席捲 Reddit r/LocalLLaMA，timkellogg.me（Bluesky，56 upvotes）直呼「他們把這個小模型拿去跟 Opus 4.5 比，表現相當不錯，讓我震驚了。」\n\n排名第二為 Anthropic Pro/Max 方案風波：Ed Zitron（Bluesky，322 upvotes）率先爆料 Claude Code 疑似從 $20／月方案移除，260 upvotes 的後續貼文繼續追問官方撤回的真實原因。\n\nGitHub CLI 遙測事件緊追其後，kat cosgrove（Bluesky，81 upvotes）公開呼籲「應設計為 opt-in 而非 opt-out」，HN 多則討論串持續延燒。\n\nOpenAI WebSocket Responses API（sasipi247，HN）和 ChatGPT 醫療人員免費開放（@Jonas_Vollmer，X）分列第四、五，均獲大量實戰回報。\n\n#### 段落 2：技術爭議與分歧\n\n本地部署派 vs. 雲端 API 派：u/_raydeStar(Reddit r/LocalLLaMA) 直指「Opus 收緊限制並調漲定價——這對本地部署來說是完美的風暴。」@kylehessling1(X) 以前端設計測試與 agentic 基準實測，驗證 Qwen3.6-27B 遠超預期。\n\n隱私權立場對立：GitHub CLI 遙測事件中，user3939382(HN) 諷刺「在未經你同意的情況下監視你……是為了你好」；caymanjim(HN) 對官方「我們需要可見性」措辭直言「你們根本不需要」。爭論焦點在於預設行為是否應以使用者知情為前提。\n\n#### 段落 3：實戰經驗（最高價值）\n\nsasipi247(HN) 實測 OpenAI Responses API WebSocket 模式逾一個月，描述「在 workers 上保持 WebSocket session 熱啟動，透過 NATS JetStream 做命令路由」，整體效能感受是「一次飛躍」。\n\nQwen3.6 量化版選擇方面，syntaxing(HN) 建議「Q8 或 Q6_UD，且不要進行 KV cache 量化」，指出 MoE 模型激活參數少時此差異更顯著。\n\n醫療 AI 快速滲透現實：@Jonas_Vollmer(X) 引述急救室醫生友人說法：「大多數醫生每天都在用 ChatGPT……習慣性地把完整的匿名患者病歷貼進個人帳號，目前的採用程度幾乎毫無阻力。」\n\n#### 段落 4：未解問題與社群預期\n\nAnthropicç 訂閱架構走向未明：@koltregaskes(X) 已從 Claude Max 切換至 ChatGPT Pro，vessenes(HN) 直指「今天 Opus 的 API 費用高達 $250，已把 openclaw 指向 Codex」，但官方仍未公告正式架構調整時程。\n\nGitHub 遙測機制是否改為 opt-in，以及 Core Automation 持續學習技術路線能否落地，均是社群關切但未獲回應的問題。overfeed(HN) 點出 Agentic pipeline 規模化的核心障礙：「新模型在每個任務上的行為與前代不同。如果這是決定性因素，那自架才是唯一解。」",[536,537,539,540,542,543,545],{"type":100,"text":101},{"type":100,"text":538},"在 Business 或 Enterprise ChatGPT 帳號建立第一個 Workspace Agent，選擇低風險任務（如 Slack 頻道週報彙整）驗證基本功能與排程穩定性",{"type":103,"text":104},{"type":103,"text":541},"設計整合 Slack 與檔案系統的自動化工作流程原型，測試自訂 MCP 伺服器擴充能力，並建立 RBAC 權限矩陣草案供 IT 安全審查",{"type":106,"text":107},{"type":106,"text":544},"追蹤 2026 年 5 月 6 日後 ChatGPT Workspace Agent 的 credit 計費實際成本、EKM 支援時程公告，以及 Microsoft Copilot 與 Google Workspace AI 的競品反應動態",{"type":106,"text":546},"關注 2026 年底 TPU 8t/8i GA 定價公告，以及 Gemini 模型版本行為一致性的改善進展——後者才是 Agentic pipeline 能否規模化的真正決定因素","Qwen3.6-27B 的出現讓本地部署重回主場，OpenAI 以 WebSocket 降低 agent 延遲，兩件事都在壓縮雲端 API 的差異化空間。與此同時，Anthropic 定價調整與 GitHub 遙測爭議揭示另一條線：AI 工具的「便利性」正在與「可控性」形成越來越明顯的對立。開發者在挑選下一個工具鏈時，或許不應只問「夠不夠快」，而是「夠不夠透明」。",{"prev":549,"next":550},"2026-04-22","2026-04-24",{"data":552,"body":553,"excerpt":-1,"toc":563},{"title":271,"description":46},{"type":554,"children":555},"root",[556],{"type":557,"tag":558,"props":559,"children":560},"element","p",{},[561],{"type":562,"value":46},"text",{"title":271,"searchDepth":564,"depth":564,"links":565},2,[],{"data":567,"body":568,"excerpt":-1,"toc":574},{"title":271,"description":50},{"type":554,"children":569},[570],{"type":557,"tag":558,"props":571,"children":572},{},[573],{"type":562,"value":50},{"title":271,"searchDepth":564,"depth":564,"links":575},[],{"data":577,"body":578,"excerpt":-1,"toc":584},{"title":271,"description":53},{"type":554,"children":579},[580],{"type":557,"tag":558,"props":581,"children":582},{},[583],{"type":562,"value":53},{"title":271,"searchDepth":564,"depth":564,"links":585},[],{"data":587,"body":588,"excerpt":-1,"toc":594},{"title":271,"description":56},{"type":554,"children":589},[590],{"type":557,"tag":558,"props":591,"children":592},{},[593],{"type":562,"value":56},{"title":271,"searchDepth":564,"depth":564,"links":595},[],{"data":597,"body":598,"excerpt":-1,"toc":737},{"title":271,"description":271},{"type":554,"children":599},[600,607,612,617,622,641,646,661,666,671,676,681,687,692,707,712,717,722,727,732],{"type":557,"tag":601,"props":602,"children":604},"h4",{"id":603},"_27b-模型規格與效能定位",[605],{"type":562,"value":606},"27B 模型規格與效能定位",{"type":557,"tag":558,"props":608,"children":609},{},[610],{"type":562,"value":611},"Alibaba Qwen 團隊於 2026 年 4 月 22 日正式發布 Qwen3.6-27B，定位為「27B 級別旗艦編程模型」。完整模型 55.6GB，Q4_K_M GGUF 量化版僅約 16.8GB，可在單張高端消費級 GPU 上流暢運行，大幅降低本地部署門檻。",{"type":557,"tag":558,"props":613,"children":614},{},[615],{"type":562,"value":616},"同期推出 MoE 姊妹款 Qwen3.6-35B-A3B，總參數 35B 但僅有 3B 激活，兩款並行提供差異化使用場景，滿足不同硬體環境的需求。採用 Apache 2.0 授權，支援商業使用與微調，無任何商業限制。",{"type":557,"tag":558,"props":618,"children":619},{},[620],{"type":562,"value":621},"在四項主要編程基準上，Qwen3.6-27B 全面超越前代旗艦 Qwen3.5-397B-A17B（體積 807GB，是新模型的 14 倍）：SWE-bench Verified 77.2 對 76.2、SWE-bench Pro 53.5 對 50.9、Terminal-Bench 2.0 59.3 對 52.5、SkillsBench 48.2 對 30.0。",{"type":557,"tag":623,"props":624,"children":625},"blockquote",{},[626],{"type":557,"tag":558,"props":627,"children":628},{},[629,635,639],{"type":557,"tag":630,"props":631,"children":632},"strong",{},[633],{"type":562,"value":634},"名詞解釋",{"type":557,"tag":636,"props":637,"children":638},"br",{},[],{"type":562,"value":640},"\nSWE-bench Verified：評估 AI 模型解決真實 GitHub issue 能力的標準化基準，涵蓋從修復 bug 到實作新功能的軟體工程任務。",{"type":557,"tag":558,"props":642,"children":643},{},[644],{"type":562,"value":645},"架構上採用 64 層、隱層維度 5120，引入 Gated DeltaNet + Gated Attention 混合塊設計，原生支援 262,144 tokens 上下文視窗，透過 YaRN 技術可擴展至約 100 萬 tokens。整合視覺編碼器，支援文字、圖像、影片多模態輸入。",{"type":557,"tag":623,"props":647,"children":648},{},[649],{"type":557,"tag":558,"props":650,"children":651},{},[652,656,659],{"type":557,"tag":630,"props":653,"children":654},{},[655],{"type":562,"value":634},{"type":557,"tag":636,"props":657,"children":658},{},[],{"type":562,"value":660},"\nYaRN(Yet another RoPE extensioN method) ：透過調整 RoPE 旋轉位置編碼參數擴展大語言模型上下文視窗的技術，可在不重新訓練的前提下顯著提升上下文長度上限。",{"type":557,"tag":601,"props":662,"children":664},{"id":663},"雲端服務收緊成為本地部署催化劑",[665],{"type":562,"value":663},{"type":557,"tag":558,"props":667,"children":668},{},[669],{"type":562,"value":670},"Anthropic Claude Opus 近期收緊存取限制並調漲定價，無意間為開源本地模型創造了絕佳時機。Reddit r/LocalLLaMA 社群的 u/_raydeStar 精準點出這個「完美風暴」：雲端漲價與強力本地模型同步出現，驅使越來越多開發者認真評估自建方案。",{"type":557,"tag":558,"props":672,"children":673},{},[674],{"type":562,"value":675},"HN 社群的真實硬體實測進一步驗證了本地部署的可行性：M5 Pro(128GB RAM) 以 Q4_K_M 量化達 25.57 tokens/s；RTX 5090(32GB) 以 Q6_K 量化可達 30+ tokens/s；R9700(32GB) 以 Q8 量化約 20 tokens/s。Simon Willison 親身驗證後指出：「本地模型雖尚未達到頂尖商業模型水準，但進步速度極快。」",{"type":557,"tag":558,"props":677,"children":678},{},[679],{"type":562,"value":680},"Unsloth AI 透過 Dynamic GGUFs 技術將最低門檻壓到 18GB 記憶體，進一步拉低硬體需求。社群亦有人提示：只要主機板具備兩個全頻寬 PCIe 插槽，可將模型分拆在兩張 16GB GPU 上運行，成本遠低於單張 RTX 5090 或 R9700。",{"type":557,"tag":601,"props":682,"children":684},{"id":683},"搭配-agent-框架的實戰表現",[685],{"type":562,"value":686},"搭配 Agent 框架的實戰表現",{"type":557,"tag":558,"props":688,"children":689},{},[690],{"type":562,"value":691},"Reddit r/LocalLLaMA 社群出現了一份具里程碑意義的研究，完整記錄 Qwen3.6-35B-A3B 在搭配合適 Agent 框架後，已能在 agentic 任務上正面超越多款主流雲端 API，並提供了系統性的跨模型比較數據——這是開源模型競爭格局的重要轉折點。",{"type":557,"tag":623,"props":693,"children":694},{},[695],{"type":557,"tag":558,"props":696,"children":697},{},[698,702,705],{"type":557,"tag":630,"props":699,"children":700},{},[701],{"type":562,"value":634},{"type":557,"tag":636,"props":703,"children":704},{},[],{"type":562,"value":706},"\nAgentic 任務：讓 AI 模型自主規劃、分解並執行多步驟任務（如自動修 bug、生成並測試程式碼），而非只進行單輪問答的應用場景。",{"type":557,"tag":558,"props":708,"children":709},{},[710],{"type":562,"value":711},"MoE 架構的關鍵優勢在於推理時僅激活 3B 參數，大幅降低計算開銷，同時維持 35B 總參數帶來的能力廣度。搭配 Agent 框架後，模型的長程推理和工具呼叫效率得到充分發揮，在複雜編程任務中展現出遠超靜態基準測試的實戰能力。",{"type":557,"tag":558,"props":713,"children":714},{},[715],{"type":562,"value":716},"量化策略選擇對 MoE 模型的影響尤為顯著。HN 社群工程師建議優先選用 Q8 或 Q6_UD 量化版，並強調在激活參數極少的 MoE 架構中不進行 KV cache 量化的重要性——即便 KL 散度下降看似微小，對最終推理品質的影響仍然實質可感。",{"type":557,"tag":601,"props":718,"children":720},{"id":719},"開源模型競爭格局與社群展望",[721],{"type":562,"value":719},{"type":557,"tag":558,"props":723,"children":724},{},[725],{"type":562,"value":726},"Qwen3.6-27B 的發布標誌著開源模型在「效能密度」上達到新高點：以前代旗艦 1/14 的體積，在四項編程基準全面勝出，打破了「更大模型 = 更強效能」的慣性認知。",{"type":557,"tag":558,"props":728,"children":729},{},[730],{"type":562,"value":731},"Bluesky 社群的 timkellogg.me 對小模型與 Opus 4.5 的比較感到震驚。多位開發者在完成前端設計與 agentic 基準測試後，表示效能提升遠超預期，Qwen 3.6 27B 相較前代幾乎是跨代躍升。",{"type":557,"tag":558,"props":733,"children":734},{},[735],{"type":562,"value":736},"Reddit r/LocalLLaMA 已有研究者針對 MoE 姊妹款的 agentic 表現進行系統性實驗，完整的跨模型比較數據正在引發廣泛討論與跟進研究。未來競爭的核心問題將不再是「開源模型能不能用」，而是「在哪些任務上開源模型已經比商業 API 更划算」。",{"title":271,"searchDepth":564,"depth":564,"links":738},[],{"data":740,"body":742,"excerpt":-1,"toc":748},{"title":271,"description":741},"Qwen3.6-27B 在架構上的核心突破，是以極致的效能密度顛覆了「更大模型 = 更強能力」的慣性認知。在 27B 參數的前提下，它在四項主要編程基準全面超越體積達 807GB 的前代旗艦，背後有三個關鍵機制在發揮作用。",{"type":554,"children":743},[744],{"type":557,"tag":558,"props":745,"children":746},{},[747],{"type":562,"value":741},{"title":271,"searchDepth":564,"depth":564,"links":749},[],{"data":751,"body":753,"excerpt":-1,"toc":774},{"title":271,"description":752},"傳統 Transformer 的注意力機制隨序列長度呈二次方成本增長，而 Qwen3.6-27B 引入 Gated DeltaNet（線性注意力變體）與 Gated Attention 的混合設計，在長上下文處理上實現成本與效能的更優平衡。64 層架構搭配 5120 的隱層維度，提供足夠的表達能力以捕捉複雜程式邏輯結構。",{"type":554,"children":754},[755,759],{"type":557,"tag":558,"props":756,"children":757},{},[758],{"type":562,"value":752},{"type":557,"tag":623,"props":760,"children":761},{},[762],{"type":557,"tag":558,"props":763,"children":764},{},[765,769,772],{"type":557,"tag":630,"props":766,"children":767},{},[768],{"type":562,"value":634},{"type":557,"tag":636,"props":770,"children":771},{},[],{"type":562,"value":773},"\nGated DeltaNet：一種線性注意力機制的變體，透過門控機制動態調整記憶更新量，在保持線性時間複雜度的同時提升序列建模能力。",{"title":271,"searchDepth":564,"depth":564,"links":775},[],{"data":777,"body":779,"excerpt":-1,"toc":785},{"title":271,"description":778},"原生支援 262,144 tokens 上下文視窗，透過 YaRN 技術可擴展至約 100 萬 tokens。對於需要分析大型程式碼庫或處理長文件的編程任務，這個能力格外關鍵——完整的上下文視窗讓模型可一次性「看見」整個專案架構，而非分批處理。",{"type":554,"children":780},[781],{"type":557,"tag":558,"props":782,"children":783},{},[784],{"type":562,"value":778},{"title":271,"searchDepth":564,"depth":564,"links":786},[],{"data":788,"body":790,"excerpt":-1,"toc":812},{"title":271,"description":789},"Qwen3.6-27B 採用單一 checkpoint 同時支援「thinking 模式」與「非思考模式」兩種推理路徑。thinking 模式讓模型展開長程推理鏈以應對複雜任務，非思考模式在速度優先的場景下快速作答，省去維護兩個獨立模型的成本。",{"type":554,"children":791},[792,796],{"type":557,"tag":558,"props":793,"children":794},{},[795],{"type":562,"value":789},{"type":557,"tag":623,"props":797,"children":798},{},[799],{"type":557,"tag":558,"props":800,"children":801},{},[802,807,810],{"type":557,"tag":630,"props":803,"children":804},{},[805],{"type":562,"value":806},"白話比喻",{"type":557,"tag":636,"props":808,"children":809},{},[],{"type":562,"value":811},"\n想象一位工程師有兩種工作節奏：遇到難題時打開草稿本慢慢推導（thinking 模式），回答日常問題時則直接開口作答（非思考模式）。Qwen3.6-27B 就是這樣一位「同一個人，兩種節奏」的助手——切換模式不需要換人，只需切換一個開關。",{"title":271,"searchDepth":564,"depth":564,"links":813},[],{"data":815,"body":816,"excerpt":-1,"toc":933},{"title":271,"description":271},{"type":554,"children":817},[818,823,848,853,876,881,886,891,904,909,922,928],{"type":557,"tag":601,"props":819,"children":821},{"id":820},"競爭版圖",[822],{"type":562,"value":820},{"type":557,"tag":824,"props":825,"children":826},"ul",{},[827,838],{"type":557,"tag":828,"props":829,"children":830},"li",{},[831,836],{"type":557,"tag":630,"props":832,"children":833},{},[834],{"type":562,"value":835},"直接競品",{"type":562,"value":837},"：Mistral Devstral 2（MoE 架構，編程特化）、DeepSeek-Coder-V2（開源編程模型）、CodeLlama 70B（Meta 開源）",{"type":557,"tag":828,"props":839,"children":840},{},[841,846],{"type":557,"tag":630,"props":842,"children":843},{},[844],{"type":562,"value":845},"間接競品",{"type":562,"value":847},"：Claude Opus API（商業雲端）、GPT-4o（OpenAI 雲端）、GitHub Copilot（整合式編程助手）",{"type":557,"tag":601,"props":849,"children":851},{"id":850},"護城河類型",[852],{"type":562,"value":850},{"type":557,"tag":824,"props":854,"children":855},{},[856,866],{"type":557,"tag":828,"props":857,"children":858},{},[859,864],{"type":557,"tag":630,"props":860,"children":861},{},[862],{"type":562,"value":863},"工程護城河",{"type":562,"value":865},"：以 27B 超越 397B 的效能密度突破，以及 Gated DeltaNet + Gated Attention 混合架構帶來的長上下文優勢，形成短期技術壁壘",{"type":557,"tag":828,"props":867,"children":868},{},[869,874],{"type":557,"tag":630,"props":870,"children":871},{},[872],{"type":562,"value":873},"生態護城河",{"type":562,"value":875},"：Apache 2.0 授權吸引企業在 Qwen 架構上進行微調和二次開發，積累的生態適配和調優經驗形成切換成本",{"type":557,"tag":601,"props":877,"children":879},{"id":878},"定價策略",[880],{"type":562,"value":878},{"type":557,"tag":558,"props":882,"children":883},{},[884],{"type":562,"value":885},"Apache 2.0「零授權費」策略，本質上以生態擴張換取未來雲端 API 流量。Alibaba Cloud 透過開源版本建立開發者黏性，引導有規模化需求的客戶轉向其商業 Qwen API。",{"type":557,"tag":601,"props":887,"children":889},{"id":888},"企業導入阻力",[890],{"type":562,"value":888},{"type":557,"tag":824,"props":892,"children":893},{},[894,899],{"type":557,"tag":828,"props":895,"children":896},{},[897],{"type":562,"value":898},"27B 參數仍需專業 GPU 硬體，中小型企業本地部署的硬體採購與維運成本不可低估",{"type":557,"tag":828,"props":900,"children":901},{},[902],{"type":562,"value":903},"缺乏企業級 SLA 支援、SOC 2 及 ISO 27001 等合規認證，大型企業採購流程有顧慮",{"type":557,"tag":601,"props":905,"children":907},{"id":906},"第二序影響",[908],{"type":562,"value":906},{"type":557,"tag":824,"props":910,"children":911},{},[912,917],{"type":557,"tag":828,"props":913,"children":914},{},[915],{"type":562,"value":916},"雲端 API 供應商（尤其 Anthropic）的高端定價壓力持續上升，可能加速雲端服務降價或功能差異化",{"type":557,"tag":828,"props":918,"children":919},{},[920],{"type":562,"value":921},"開源編程模型生態加速成熟，企業 AI 工具鏈「自建比採購便宜」的轉折點提前到來",{"type":557,"tag":601,"props":923,"children":925},{"id":924},"判決值得立即啟動-poc效能已達門檻視覺與生態仍待驗證",[926],{"type":562,"value":927},"判決：值得立即啟動 PoC（效能已達門檻，視覺與生態仍待驗證）",{"type":557,"tag":558,"props":929,"children":930},{},[931],{"type":562,"value":932},"效能已達雲端替代的嚴肅競爭者水準，但視覺模態問題、thinking token 消耗和企業支援缺口意味著生產環境風險仍在。建議有明確編程自動化場景的團隊立即啟動 PoC，同時保留雲端 fallback，待生態系穩定後再評估全面切換。",{"title":271,"searchDepth":564,"depth":564,"links":934},[],{"data":936,"body":937,"excerpt":-1,"toc":992},{"title":271,"description":271},{"type":554,"children":938},[939,945,968,974],{"type":557,"tag":601,"props":940,"children":942},{"id":941},"主要編程基準-qwen36-27b-vs-qwen35-397b-a17b",[943],{"type":562,"value":944},"主要編程基準 (Qwen3.6-27B vs Qwen3.5-397B-A17B)",{"type":557,"tag":824,"props":946,"children":947},{},[948,953,958,963],{"type":557,"tag":828,"props":949,"children":950},{},[951],{"type":562,"value":952},"SWE-bench Verified：77.2 vs 76.2(+1.0)",{"type":557,"tag":828,"props":954,"children":955},{},[956],{"type":562,"value":957},"SWE-bench Pro：53.5 vs 50.9(+2.6)",{"type":557,"tag":828,"props":959,"children":960},{},[961],{"type":562,"value":962},"Terminal-Bench 2.0：59.3 vs 52.5(+6.8)",{"type":557,"tag":828,"props":964,"children":965},{},[966],{"type":562,"value":967},"SkillsBench：48.2 vs 30.0(+18.2)",{"type":557,"tag":601,"props":969,"children":971},{"id":970},"本地推理速度社群硬體實測",[972],{"type":562,"value":973},"本地推理速度（社群硬體實測）",{"type":557,"tag":824,"props":975,"children":976},{},[977,982,987],{"type":557,"tag":828,"props":978,"children":979},{},[980],{"type":562,"value":981},"M5 Pro（128GB RAM，Q4_K_M 量化）：25.57 tokens/s（Simon Willison 驗證）",{"type":557,"tag":828,"props":983,"children":984},{},[985],{"type":562,"value":986},"RTX 5090（32GB，Q6_K 量化）：30+ tokens/s",{"type":557,"tag":828,"props":988,"children":989},{},[990],{"type":562,"value":991},"R9700（32GB，Q8 量化）：約 20 tokens/s",{"title":271,"searchDepth":564,"depth":564,"links":993},[],{"data":995,"body":996,"excerpt":-1,"toc":1017},{"title":271,"description":271},{"type":554,"children":997},[998],{"type":557,"tag":824,"props":999,"children":1000},{},[1001,1005,1009,1013],{"type":557,"tag":828,"props":1002,"children":1003},{},[1004],{"type":562,"value":62},{"type":557,"tag":828,"props":1006,"children":1007},{},[1008],{"type":562,"value":63},{"type":557,"tag":828,"props":1010,"children":1011},{},[1012],{"type":562,"value":64},{"type":557,"tag":828,"props":1014,"children":1015},{},[1016],{"type":562,"value":65},{"title":271,"searchDepth":564,"depth":564,"links":1018},[],{"data":1020,"body":1021,"excerpt":-1,"toc":1038},{"title":271,"description":271},{"type":554,"children":1022},[1023],{"type":557,"tag":824,"props":1024,"children":1025},{},[1026,1030,1034],{"type":557,"tag":828,"props":1027,"children":1028},{},[1029],{"type":562,"value":67},{"type":557,"tag":828,"props":1031,"children":1032},{},[1033],{"type":562,"value":68},{"type":557,"tag":828,"props":1035,"children":1036},{},[1037],{"type":562,"value":69},{"title":271,"searchDepth":564,"depth":564,"links":1039},[],{"data":1041,"body":1042,"excerpt":-1,"toc":1048},{"title":271,"description":73},{"type":554,"children":1043},[1044],{"type":557,"tag":558,"props":1045,"children":1046},{},[1047],{"type":562,"value":73},{"title":271,"searchDepth":564,"depth":564,"links":1049},[],{"data":1051,"body":1052,"excerpt":-1,"toc":1058},{"title":271,"description":74},{"type":554,"children":1053},[1054],{"type":557,"tag":558,"props":1055,"children":1056},{},[1057],{"type":562,"value":74},{"title":271,"searchDepth":564,"depth":564,"links":1059},[],{"data":1061,"body":1062,"excerpt":-1,"toc":1068},{"title":271,"description":132},{"type":554,"children":1063},[1064],{"type":557,"tag":558,"props":1065,"children":1066},{},[1067],{"type":562,"value":132},{"title":271,"searchDepth":564,"depth":564,"links":1069},[],{"data":1071,"body":1072,"excerpt":-1,"toc":1078},{"title":271,"description":135},{"type":554,"children":1073},[1074],{"type":557,"tag":558,"props":1075,"children":1076},{},[1077],{"type":562,"value":135},{"title":271,"searchDepth":564,"depth":564,"links":1079},[],{"data":1081,"body":1082,"excerpt":-1,"toc":1088},{"title":271,"description":137},{"type":554,"children":1083},[1084],{"type":557,"tag":558,"props":1085,"children":1086},{},[1087],{"type":562,"value":137},{"title":271,"searchDepth":564,"depth":564,"links":1089},[],{"data":1091,"body":1092,"excerpt":-1,"toc":1098},{"title":271,"description":139},{"type":554,"children":1093},[1094],{"type":557,"tag":558,"props":1095,"children":1096},{},[1097],{"type":562,"value":139},{"title":271,"searchDepth":564,"depth":564,"links":1099},[],{"data":1101,"body":1102,"excerpt":-1,"toc":1223},{"title":271,"description":271},{"type":554,"children":1103},[1104,1110,1115,1120,1125,1130,1136,1141,1146,1151,1156,1162,1167,1182,1187,1192,1197,1203,1208,1213,1218],{"type":557,"tag":601,"props":1105,"children":1107},{"id":1106},"章節一workspace-agent-功能與運作機制",[1108],{"type":562,"value":1109},"章節一：Workspace Agent 功能與運作機制",{"type":557,"tag":558,"props":1111,"children":1112},{},[1113],{"type":562,"value":1114},"OpenAI 於 2026 年 4 月 22 日正式發布 ChatGPT Workspace Agents，定位為「GPTs 的進化版本」，核心目標是讓組織內的自動化 agent 能跨工具、跨團隊執行複雜的多步驟工作流程。",{"type":557,"tag":558,"props":1116,"children":1117},{},[1118],{"type":562,"value":1119},"Workspace Agent 具備持久記憶 (persistent memory) ，可跨 session 累積學習，效能隨使用時間持續提升。支援整合的工具涵蓋檔案系統、程式碼執行環境、連線 App、排程觸發器及自訂 MCP 伺服器，並可在使用者不在線的情況下持續排程執行。",{"type":557,"tag":558,"props":1121,"children":1122},{},[1123],{"type":562,"value":1124},"典型用例包含四類：Software Reviewer（政策合規檢查＋自動建立 IT ticket）、Product Feedback Router（多頻道監控＋週報彙整）、Weekly Metrics Reporter（自動蒐集並分發資料）、Lead Scoring（銷售線索評分與外展自動化）。",{"type":557,"tag":558,"props":1126,"children":1127},{},[1128],{"type":562,"value":1129},"值得注意的是，現有 GPT 可透過即將推出的轉換工具直接升級為 Workspace Agent，大幅降低既有使用者的遷移門檻。",{"type":557,"tag":601,"props":1131,"children":1133},{"id":1132},"章節二codex-引擎的雲端自動化架構",[1134],{"type":562,"value":1135},"章節二：Codex 引擎的雲端自動化架構",{"type":557,"tag":558,"props":1137,"children":1138},{},[1139],{"type":562,"value":1140},"Workspace Agent 底層由 OpenAI 的 Codex 引擎驅動，以 GPT-5.3-Codex 模型作為核心推論引擎，同時支援 GPT-5.4 與 GPT-5.4-mini 模型執行 agent 任務。",{"type":557,"tag":558,"props":1142,"children":1143},{},[1144],{"type":562,"value":1145},"雲端原生架構是這次設計的關鍵突破——agent 在雲端持續運行，不依賴使用者本地裝置或在線狀態，實現真正的非同步自動化。",{"type":557,"tag":558,"props":1147,"children":1148},{},[1149],{"type":562,"value":1150},"計費方面，免費試用期至 2026 年 5 月 6 日截止，之後改為 credit-based 計費模式。GPT-5.4 費率為輸入 62.5 credits/1M tokens、輸出 375 credits/1M tokens；GPT-5.3-Codex 則為輸入 43.75 credits/1M tokens、輸出 350 credits/1M tokens。",{"type":557,"tag":558,"props":1152,"children":1153},{},[1154],{"type":562,"value":1155},"Business 方案提供較大虛擬機以加速雲端任務，Enterprise 方案享有優先請求處理待遇。這套階梯式架構讓企業可依任務複雜度與預算選擇適合的模型版本。",{"type":557,"tag":601,"props":1157,"children":1159},{"id":1158},"章節三企業安全與權限管理設計",[1160],{"type":562,"value":1161},"章節三：企業安全與權限管理設計",{"type":557,"tag":558,"props":1163,"children":1164},{},[1165],{"type":562,"value":1166},"Workspace Agent 採用 RBAC 機制，管理員可精細控制誰能建立 agent、誰可使用哪些工具，並提供稽核日誌 (Audit Logs) 與集中管理介面。",{"type":557,"tag":623,"props":1168,"children":1169},{},[1170],{"type":557,"tag":558,"props":1171,"children":1172},{},[1173,1177,1180],{"type":557,"tag":630,"props":1174,"children":1175},{},[1176],{"type":562,"value":634},{"type":557,"tag":636,"props":1178,"children":1179},{},[],{"type":562,"value":1181},"\nRBAC（Role-Based Access Control，角色型存取控制）：依使用者角色（如管理員、一般員工）決定系統存取權限的機制，而非針對個別使用者逐一設定，是企業 IT 治理的主流授權模型。",{"type":557,"tag":558,"props":1183,"children":1184},{},[1185],{"type":562,"value":1186},"為降低企業未預期的暴露風險，Enterprise 工作區預設為停用狀態，需管理員主動開啟；使用 EKM(Enterprise Key Management) 的帳號目前暫不支援此功能。",{"type":557,"tag":558,"props":1188,"children":1189},{},[1190],{"type":562,"value":1191},"agent 可同時存取行事曆、SharePoint 文件、網路資料與內部系統，若權限設定不當，可能導致敏感資訊誤導向或非預期操作。",{"type":557,"tag":558,"props":1193,"children":1194},{},[1195],{"type":562,"value":1196},"平台雖內建 Prompt Injection 攻擊防禦與 Compliance API 監控，但企業導入前仍需審慎規劃存取控制邊界，不可將安全責任全數委由平台承擔。",{"type":557,"tag":601,"props":1198,"children":1200},{"id":1199},"章節四對企業-ai-協作生態的影響",[1201],{"type":562,"value":1202},"章節四：對企業 AI 協作生態的影響",{"type":557,"tag":558,"props":1204,"children":1205},{},[1206],{"type":562,"value":1207},"Workspace Agent 的發布標誌著 ChatGPT 在企業場景的定位轉型——從「個人助理」升級為「跨部門數位員工」。「build once， use together， improve over time」的共用模型設計，讓各部門無需重複建置 agent，直接共享持續優化的工作流程。",{"type":557,"tag":558,"props":1209,"children":1210},{},[1211],{"type":562,"value":1212},"OpenAI 在 Slack 平台已深度布局：170 個以上的 Connect 頻道、逾 500 萬條訊息歷史（自 2018 年起）。Workspace Agent 可直接嵌入現有溝通流程，而非要求企業改變工作習慣，這是相較於競品的關鍵差異化優勢。",{"type":557,"tag":558,"props":1214,"children":1215},{},[1216],{"type":562,"value":1217},"競爭格局面臨直接衝擊。Microsoft Copilot 與 Google Workspace AI 長期深耕企業工作流程，OpenAI 此次直接切入同一領域，使三方正面交鋒更加激烈。",{"type":557,"tag":558,"props":1219,"children":1220},{},[1221],{"type":562,"value":1222},"這只是 OpenAI 2026 年 4 月密集發布節奏的一部分，同期還有 ChatGPT Images 2、Codex Chronicle 與 Mac 版 Codex 大更新，顯示 OpenAI 正在加速企業市場的整體布局。",{"title":271,"searchDepth":564,"depth":564,"links":1224},[],{"data":1226,"body":1228,"excerpt":-1,"toc":1234},{"title":271,"description":1227},"Workspace Agent 的核心架構突破在於三個層面的技術整合——持久記憶、雲端非同步執行、以及跨工具協作能力。這三者的組合讓 agent 從「對話工具」進化為「可持續運作的數位同事」。",{"type":554,"children":1229},[1230],{"type":557,"tag":558,"props":1231,"children":1232},{},[1233],{"type":562,"value":1227},{"title":271,"searchDepth":564,"depth":564,"links":1235},[],{"data":1237,"body":1239,"excerpt":-1,"toc":1250},{"title":271,"description":1238},"傳統 ChatGPT 每次對話都從空白狀態開始，而 Workspace Agent 具備持久記憶機制，能跨 session 累積組織特定的知識——包含業務規則、偏好回應格式、常見例外處理方式。",{"type":554,"children":1240},[1241,1245],{"type":557,"tag":558,"props":1242,"children":1243},{},[1244],{"type":562,"value":1238},{"type":557,"tag":558,"props":1246,"children":1247},{},[1248],{"type":562,"value":1249},"隨著使用時間增長，agent 的輸出品質持續提升，形成良性的學習飛輪。既有 GPT 可透過即將推出的轉換工具直接升級，使組織在 GPTs 時代已沉澱的設定不至於白費。",{"title":271,"searchDepth":564,"depth":564,"links":1251},[],{"data":1253,"body":1255,"excerpt":-1,"toc":1281},{"title":271,"description":1254},"底層由 Codex 引擎（GPT-5.3-Codex 模型）驅動，採用雲端原生架構。agent 在雲端伺服器持續運行，支援排程觸發，不需使用者在線即可執行任務。",{"type":554,"children":1256},[1257,1261,1266],{"type":557,"tag":558,"props":1258,"children":1259},{},[1260],{"type":562,"value":1254},{"type":557,"tag":558,"props":1262,"children":1263},{},[1264],{"type":562,"value":1265},"這與傳統 RPA 工具「需要本地機器保持開機」的模式截然不同。典型場景如：每週一早上自動彙整銷售數據並寄送報告，或在夜間持續監控多個 Slack 頻道的客訴訊號。",{"type":557,"tag":623,"props":1267,"children":1268},{},[1269],{"type":557,"tag":558,"props":1270,"children":1271},{},[1272,1276,1279],{"type":557,"tag":630,"props":1273,"children":1274},{},[1275],{"type":562,"value":634},{"type":557,"tag":636,"props":1277,"children":1278},{},[],{"type":562,"value":1280},"\nRPA（Robotic Process Automation，機器人流程自動化）：透過軟體機器人模擬人類操作介面（如滑鼠點擊、鍵盤輸入）自動化重複性工作任務的技術，傳統上依賴本地機器保持運行狀態。",{"title":271,"searchDepth":564,"depth":564,"links":1282},[],{"data":1284,"body":1286,"excerpt":-1,"toc":1340},{"title":271,"description":1285},"Workspace Agent 可整合的工具類型涵蓋：",{"type":554,"children":1287},[1288,1292,1320,1325],{"type":557,"tag":558,"props":1289,"children":1290},{},[1291],{"type":562,"value":1285},{"type":557,"tag":824,"props":1293,"children":1294},{},[1295,1300,1305,1310,1315],{"type":557,"tag":828,"props":1296,"children":1297},{},[1298],{"type":562,"value":1299},"檔案系統（SharePoint、Google Drive）",{"type":557,"tag":828,"props":1301,"children":1302},{},[1303],{"type":562,"value":1304},"程式碼執行環境",{"type":557,"tag":828,"props":1306,"children":1307},{},[1308],{"type":562,"value":1309},"連線 App（Slack、行事曆、CRM）",{"type":557,"tag":828,"props":1311,"children":1312},{},[1313],{"type":562,"value":1314},"排程觸發器",{"type":557,"tag":828,"props":1316,"children":1317},{},[1318],{"type":562,"value":1319},"自訂 MCP(Model Context Protocol) 伺服器",{"type":557,"tag":558,"props":1321,"children":1322},{},[1323],{"type":562,"value":1324},"MCP 協定的支援尤為關鍵——企業可將內部系統（ERP、資料庫、內部 API）包裝為 MCP 伺服器，讓 agent 以標準化方式存取，無需 OpenAI 官方提供個別整合。",{"type":557,"tag":623,"props":1326,"children":1327},{},[1328],{"type":557,"tag":558,"props":1329,"children":1330},{},[1331,1335,1338],{"type":557,"tag":630,"props":1332,"children":1333},{},[1334],{"type":562,"value":806},{"type":557,"tag":636,"props":1336,"children":1337},{},[],{"type":562,"value":1339},"\n想像 Workspace Agent 是一位永遠在線的資深員工：你只需要培訓他一次（建立 agent、設定工具權限），之後他會記住所有你教過的事，在你睡覺時繼續工作，而且越做越上手。MCP 伺服器就像公司的門禁卡——讓他只能進入被授權的系統，不會亂闖。",{"title":271,"searchDepth":564,"depth":564,"links":1341},[],{"data":1343,"body":1344,"excerpt":-1,"toc":1463},{"title":271,"description":271},{"type":554,"children":1345},[1346,1351,1356,1361,1367,1372,1384,1389,1394,1412,1417,1440,1445],{"type":557,"tag":601,"props":1347,"children":1349},{"id":1348},"環境需求",[1350],{"type":562,"value":1348},{"type":557,"tag":558,"props":1352,"children":1353},{},[1354],{"type":562,"value":1355},"目前僅限 Business、Enterprise、Edu 及 Teachers 方案使用；Enterprise 工作區需管理員主動開啟 Workspace Agent 功能。使用 EKM 的帳號暫不支援，需等待後續更新公告。",{"type":557,"tag":558,"props":1357,"children":1358},{},[1359],{"type":562,"value":1360},"建議先在非生產環境（獨立測試工作區）驗證 agent 行為與權限邊界，確認無誤後再推廣至正式部署，避免敏感工具在測試期間被誤觸。",{"type":557,"tag":601,"props":1362,"children":1364},{"id":1363},"最小-poc",[1365],{"type":562,"value":1366},"最小 PoC",{"type":557,"tag":558,"props":1368,"children":1369},{},[1370],{"type":562,"value":1371},"以「自動彙整 Slack 頻道週報」為最低風險起點：",{"type":557,"tag":1373,"props":1374,"children":1378},"pre",{"className":1375,"code":1377,"language":562,"meta":271},[1376],"language-text","1. 在 ChatGPT 建立新 Workspace Agent（命名與用途描述）\n2. 連線目標 Slack 工作區（需管理員授權 OAuth 範圍）\n3. 指定監控頻道：如 #product-feedback、#customer-support\n4. 設定排程：每週五 17:00 觸發\n5. 定義輸出格式：markdown 摘要 + 優先議題列表\n6. 手動觸發一次，驗證輸出品質與頻道存取正確性\n7. 確認無誤後啟用自動排程\n",[1379],{"type":557,"tag":1380,"props":1381,"children":1382},"code",{"__ignoreMap":271},[1383],{"type":562,"value":1377},{"type":557,"tag":601,"props":1385,"children":1387},{"id":1386},"驗測規劃",[1388],{"type":562,"value":1386},{"type":557,"tag":558,"props":1390,"children":1391},{},[1392],{"type":562,"value":1393},"驗測重點應涵蓋三個面向：",{"type":557,"tag":824,"props":1395,"children":1396},{},[1397,1402,1407],{"type":557,"tag":828,"props":1398,"children":1399},{},[1400],{"type":562,"value":1401},"功能正確性：agent 能否正確識別並執行指定任務，輸出格式是否符合預期範本",{"type":557,"tag":828,"props":1403,"children":1404},{},[1405],{"type":562,"value":1406},"權限邊界：agent 是否只存取被授權的資料範圍，不跨越 RBAC 設定的邊界",{"type":557,"tag":828,"props":1408,"children":1409},{},[1410],{"type":562,"value":1411},"排程可靠性：定時觸發是否穩定，失敗時是否觸發通知機制（郵件告警或 Slack 推播）",{"type":557,"tag":601,"props":1413,"children":1415},{"id":1414},"常見陷阱",[1416],{"type":562,"value":1414},{"type":557,"tag":824,"props":1418,"children":1419},{},[1420,1425,1430,1435],{"type":557,"tag":828,"props":1421,"children":1422},{},[1423],{"type":562,"value":1424},"過度授權：初期為求便利給予過大存取範圍，導致 agent 可接觸不應碰觸的敏感資料",{"type":557,"tag":828,"props":1426,"children":1427},{},[1428],{"type":562,"value":1429},"Prompt Injection 風險殘留：平台雖有內建防護，但外部輸入（如使用者上傳文件、第三方 webhook 內容）仍可能含有惡意指令",{"type":557,"tag":828,"props":1431,"children":1432},{},[1433],{"type":562,"value":1434},"Credit 成本估算失準：排程 agent 的 token 消耗難以預測，上線前務必設定用量上限與超限告警",{"type":557,"tag":828,"props":1436,"children":1437},{},[1438],{"type":562,"value":1439},"MCP 伺服器認證疏漏：若自訂 MCP 伺服器未妥善實作 token 驗證，可能成為未授權存取的入口",{"type":557,"tag":601,"props":1441,"children":1443},{"id":1442},"上線檢核清單",[1444],{"type":562,"value":1442},{"type":557,"tag":824,"props":1446,"children":1447},{},[1448,1453,1458],{"type":557,"tag":828,"props":1449,"children":1450},{},[1451],{"type":562,"value":1452},"觀測：Credit 月用量監控儀表板、任務執行成功率追蹤、輸出品質抽查頻率（前兩週建議每日抽查）",{"type":557,"tag":828,"props":1454,"children":1455},{},[1456],{"type":562,"value":1457},"成本：預估月用量 × credits 費率，設定超限告警閾值，向管理層報告預算基準",{"type":557,"tag":828,"props":1459,"children":1460},{},[1461],{"type":562,"value":1462},"風險：RBAC 權限矩陣審查（最小權限原則）、稽核日誌保留期限確認（建議 90 天以上）、財務或 HR 等敏感工具是否設有額外人工審批節點",{"title":271,"searchDepth":564,"depth":564,"links":1464},[],{"data":1466,"body":1467,"excerpt":-1,"toc":1598},{"title":271,"description":271},{"type":554,"children":1468},[1469,1473,1494,1498,1519,1523,1528,1533,1537,1560,1564,1582,1588,1593],{"type":557,"tag":601,"props":1470,"children":1471},{"id":820},[1472],{"type":562,"value":820},{"type":557,"tag":824,"props":1474,"children":1475},{},[1476,1485],{"type":557,"tag":828,"props":1477,"children":1478},{},[1479,1483],{"type":557,"tag":630,"props":1480,"children":1481},{},[1482],{"type":562,"value":835},{"type":562,"value":1484},"：Microsoft Copilot（深度整合 M365 生態，已大規模商業部署）、Google Gemini for Workspace（Google Workspace 原生整合）",{"type":557,"tag":828,"props":1486,"children":1487},{},[1488,1492],{"type":557,"tag":630,"props":1489,"children":1490},{},[1491],{"type":562,"value":845},{"type":562,"value":1493},"：Zapier AI、Make.com（工作流程自動化平台）、UiPath（傳統 RPA 轉型 AI）、Slack AI（平台原生 AI 功能，直接競爭 Slack 整合場景）",{"type":557,"tag":601,"props":1495,"children":1496},{"id":850},[1497],{"type":562,"value":850},{"type":557,"tag":824,"props":1499,"children":1500},{},[1501,1510],{"type":557,"tag":828,"props":1502,"children":1503},{},[1504,1508],{"type":557,"tag":630,"props":1505,"children":1506},{},[1507],{"type":562,"value":863},{"type":562,"value":1509},"：Codex 模型對多步驟推理與程式碼生成的特化能力；雲端非同步執行架構與持久記憶系統，短期難以快速複製",{"type":557,"tag":828,"props":1511,"children":1512},{},[1513,1517],{"type":557,"tag":630,"props":1514,"children":1515},{},[1516],{"type":562,"value":873},{"type":562,"value":1518},"：OpenAI 在 Slack 平台 170 個以上 Connect 頻道、500 萬條訊息歷史的深度整合；GPTs 龐大既有用戶基礎可直接升級，大幅降低遷移阻力",{"type":557,"tag":601,"props":1520,"children":1521},{"id":878},[1522],{"type":562,"value":878},{"type":557,"tag":558,"props":1524,"children":1525},{},[1526],{"type":562,"value":1527},"採用 credit-based 計費模式，免費試用期至 2026 年 5 月 6 日。這種結構將 agent 用量綁定於現有 ChatGPT 訂閱，降低企業導入的決策摩擦——不需要額外預算審批新工具，只需在現有方案內增加用量。",{"type":557,"tag":558,"props":1529,"children":1530},{},[1531],{"type":562,"value":1532},"長期來看，credit 費率的不透明性（月帳單難以預測）可能成為企業財務部門的顧慮點，這恰好是 Microsoft Copilot 固定月費模式的相對優勢。",{"type":557,"tag":601,"props":1534,"children":1535},{"id":888},[1536],{"type":562,"value":888},{"type":557,"tag":824,"props":1538,"children":1539},{},[1540,1545,1550,1555],{"type":557,"tag":828,"props":1541,"children":1542},{},[1543],{"type":562,"value":1544},"安全合規審查週期長：agent 跨系統存取的權限設計，須通過 IT 與法務部門評估，一般需 1-3 個月",{"type":557,"tag":828,"props":1546,"children":1547},{},[1548],{"type":562,"value":1549},"EKM 帳號不支援：排除高安全性需求客戶（金融、政府），縮小短期可觸及市場",{"type":557,"tag":828,"props":1551,"children":1552},{},[1553],{"type":562,"value":1554},"角色分工轉變：從「使用 ChatGPT」到「設計並維護 Workspace Agent」需要新技能組合，培訓成本不可忽視",{"type":557,"tag":828,"props":1556,"children":1557},{},[1558],{"type":562,"value":1559},"既有工具替換摩擦：已部署 Copilot 或 Google Workspace AI 的組織，替換需要遷移成本與重新培訓",{"type":557,"tag":601,"props":1561,"children":1562},{"id":906},[1563],{"type":562,"value":906},{"type":557,"tag":824,"props":1565,"children":1566},{},[1567,1572,1577],{"type":557,"tag":828,"props":1568,"children":1569},{},[1570],{"type":562,"value":1571},"企業 IT 採購格局重塑：Workspace Agent 若獲大規模採用，可能取代多個單點 SaaS 工具，衝擊中小型 B2B SaaS 生態",{"type":557,"tag":828,"props":1573,"children":1574},{},[1575],{"type":562,"value":1576},"新職位出現：「AI Agent 管理員」可能成為企業 IT 部門的新標準角色，負責 agent 設計、維護與稽核",{"type":557,"tag":828,"props":1578,"children":1579},{},[1580],{"type":562,"value":1581},"監管壓力上升：agent 自動決策行為將促使監管機構關注 AI 在企業流程中的責任歸屬，預計帶動合規工具需求",{"type":557,"tag":601,"props":1583,"children":1585},{"id":1584},"判決護城河正在成形企業應啟動沙盒評估全面部署需-3-6-個月準備",[1586],{"type":562,"value":1587},"判決：護城河正在成形，企業應啟動沙盒評估（全面部署需 3-6 個月準備）",{"type":557,"tag":558,"props":1589,"children":1590},{},[1591],{"type":562,"value":1592},"Workspace Agent 的技術整合深度與生態布局已超越一般 AI 助手定位。對 OpenAI 既有企業訂閱用戶而言，沙盒 PoC 的機會成本極低，值得立即啟動。",{"type":557,"tag":558,"props":1594,"children":1595},{},[1596],{"type":562,"value":1597},"但 EKM 限制、credit 成本不確定性與安全審查需求，使全面部署至少需要 3-6 個月準備期。Microsoft 與 Google 的反制動作值得同步追蹤。",{"title":271,"searchDepth":564,"depth":564,"links":1599},[],{"data":1601,"body":1603,"excerpt":-1,"toc":1642},{"title":271,"description":1602},"目前為 Research Preview 階段，OpenAI 尚未發布 Workspace Agent 的正式效能基準測試數據。",{"type":554,"children":1604},[1605,1609,1614,1619,1637],{"type":557,"tag":558,"props":1606,"children":1607},{},[1608],{"type":562,"value":1602},{"type":557,"tag":601,"props":1610,"children":1612},{"id":1611},"計費費率參考",[1613],{"type":562,"value":1611},{"type":557,"tag":558,"props":1615,"children":1616},{},[1617],{"type":562,"value":1618},"以下為可用於成本估算的官方 credit 費率：",{"type":557,"tag":824,"props":1620,"children":1621},{},[1622,1627,1632],{"type":557,"tag":828,"props":1623,"children":1624},{},[1625],{"type":562,"value":1626},"GPT-5.4：輸入 62.5 credits/1M tokens、輸出 375 credits/1M tokens",{"type":557,"tag":828,"props":1628,"children":1629},{},[1630],{"type":562,"value":1631},"GPT-5.3-Codex：輸入 43.75 credits/1M tokens、輸出 350 credits/1M tokens",{"type":557,"tag":828,"props":1633,"children":1634},{},[1635],{"type":562,"value":1636},"GPT-5.4-mini：費率較低（詳見 OpenAI 開發者定價頁面）",{"type":557,"tag":558,"props":1638,"children":1639},{},[1640],{"type":562,"value":1641},"實際任務成本需依工作流程的輸入輸出比例估算，排程 agent 的長期費用難以預測，上線前建議進行壓力測試並設定用量上限告警。",{"title":271,"searchDepth":564,"depth":564,"links":1643},[],{"data":1645,"body":1646,"excerpt":-1,"toc":1667},{"title":271,"description":271},{"type":554,"children":1647},[1648],{"type":557,"tag":824,"props":1649,"children":1650},{},[1651,1655,1659,1663],{"type":557,"tag":828,"props":1652,"children":1653},{},[1654],{"type":562,"value":145},{"type":557,"tag":828,"props":1656,"children":1657},{},[1658],{"type":562,"value":146},{"type":557,"tag":828,"props":1660,"children":1661},{},[1662],{"type":562,"value":147},{"type":557,"tag":828,"props":1664,"children":1665},{},[1666],{"type":562,"value":148},{"title":271,"searchDepth":564,"depth":564,"links":1668},[],{"data":1670,"body":1671,"excerpt":-1,"toc":1692},{"title":271,"description":271},{"type":554,"children":1672},[1673],{"type":557,"tag":824,"props":1674,"children":1675},{},[1676,1680,1684,1688],{"type":557,"tag":828,"props":1677,"children":1678},{},[1679],{"type":562,"value":150},{"type":557,"tag":828,"props":1681,"children":1682},{},[1683],{"type":562,"value":151},{"type":557,"tag":828,"props":1685,"children":1686},{},[1687],{"type":562,"value":152},{"type":557,"tag":828,"props":1689,"children":1690},{},[1691],{"type":562,"value":153},{"title":271,"searchDepth":564,"depth":564,"links":1693},[],{"data":1695,"body":1696,"excerpt":-1,"toc":1702},{"title":271,"description":157},{"type":554,"children":1697},[1698],{"type":557,"tag":558,"props":1699,"children":1700},{},[1701],{"type":562,"value":157},{"title":271,"searchDepth":564,"depth":564,"links":1703},[],{"data":1705,"body":1706,"excerpt":-1,"toc":1712},{"title":271,"description":158},{"type":554,"children":1707},[1708],{"type":557,"tag":558,"props":1709,"children":1710},{},[1711],{"type":562,"value":158},{"title":271,"searchDepth":564,"depth":564,"links":1713},[],{"data":1715,"body":1716,"excerpt":-1,"toc":1722},{"title":271,"description":159},{"type":554,"children":1717},[1718],{"type":557,"tag":558,"props":1719,"children":1720},{},[1721],{"type":562,"value":159},{"title":271,"searchDepth":564,"depth":564,"links":1723},[],{"data":1725,"body":1726,"excerpt":-1,"toc":1732},{"title":271,"description":202},{"type":554,"children":1727},[1728],{"type":557,"tag":558,"props":1729,"children":1730},{},[1731],{"type":562,"value":202},{"title":271,"searchDepth":564,"depth":564,"links":1733},[],{"data":1735,"body":1736,"excerpt":-1,"toc":1742},{"title":271,"description":205},{"type":554,"children":1737},[1738],{"type":557,"tag":558,"props":1739,"children":1740},{},[1741],{"type":562,"value":205},{"title":271,"searchDepth":564,"depth":564,"links":1743},[],{"data":1745,"body":1746,"excerpt":-1,"toc":1752},{"title":271,"description":207},{"type":554,"children":1747},[1748],{"type":557,"tag":558,"props":1749,"children":1750},{},[1751],{"type":562,"value":207},{"title":271,"searchDepth":564,"depth":564,"links":1753},[],{"data":1755,"body":1756,"excerpt":-1,"toc":1762},{"title":271,"description":209},{"type":554,"children":1757},[1758],{"type":557,"tag":558,"props":1759,"children":1760},{},[1761],{"type":562,"value":209},{"title":271,"searchDepth":564,"depth":564,"links":1763},[],{"data":1765,"body":1766,"excerpt":-1,"toc":1883},{"title":271,"description":271},{"type":554,"children":1767},[1768,1774,1779,1790,1795,1800,1805,1811,1816,1821,1836,1841,1847,1852,1857,1862,1868,1873,1878],{"type":557,"tag":601,"props":1769,"children":1771},{"id":1770},"章節一tpu-v8-雙晶片分工架構解析",[1772],{"type":562,"value":1773},"章節一：TPU v8 雙晶片分工架構解析",{"type":557,"tag":558,"props":1775,"children":1776},{},[1777],{"type":562,"value":1778},"2026 年 4 月，Google 在 Cloud Next '26 正式發表第八代 TPU，做出了標誌性的架構決策：將「一代一款」的單一路線，正式拆分為 TPU 8t（訓練）與 TPU 8i（推論）兩款專用晶片。",{"type":557,"tag":558,"props":1780,"children":1781},{},[1782,1788],{"type":557,"tag":1783,"props":1784,"children":1785},"span",{},[1786],{"type":562,"value":1787},"rss-google-ai-6a159b97",{"type":562,"value":1789}," 的官方定位開宗明義：「兩款專用晶片，為 Agentic 時代提供動力。」這並非行銷語言，而是對底層工作負載特性的直接回應——訓練晶片追求橫向擴展的算力天花板，推論晶片追求單 token 延遲的極致壓縮。",{"type":557,"tag":558,"props":1791,"children":1792},{},[1793],{"type":562,"value":1794},"TPU 8t 單 superpod 算力達 121 ExaFlops，可整合 9,600 顆晶片，透過 Virgo Network 可將單一邏輯叢集擴展至 100 萬顆晶片，Goodput（有效計算時間佔比）超過 97%，interchip 頻寬為前代 2 倍，儲存存取速度為前代 10 倍。",{"type":557,"tag":558,"props":1796,"children":1797},{},[1798],{"type":562,"value":1799},"TPU 8i 則配備 288 GB HBM、384 MB 片上 SRAM（前代 3 倍），引入 Boardfly 拓撲架構與 Collectives Acceleration Engine，最高可降低 5 倍 MoE 集體通訊延遲。",{"type":557,"tag":558,"props":1801,"children":1802},{},[1803],{"type":562,"value":1804},"兩款晶片均採用 Google 自研 Arm 架構 Axion CPU，製造商分別為 Broadcom(8t) 與 MediaTek(8i) ，預計 2026 年底 GA。",{"type":557,"tag":601,"props":1806,"children":1808},{"id":1807},"章節二agent-工作負載的硬體特化設計",[1809],{"type":562,"value":1810},"章節二：Agent 工作負載的硬體特化設計",{"type":557,"tag":558,"props":1812,"children":1813},{},[1814],{"type":562,"value":1815},"Agent 推論與傳統批次推論的根本差異，在於 KV cache 的存取模式——多輪對話、工具呼叫、上下文保留，使記憶體容量與延遲成為決定用戶體驗的關鍵瓶頸。",{"type":557,"tag":558,"props":1817,"children":1818},{},[1819],{"type":562,"value":1820},"TPU 8i 的 384 MB 片上 SRAM 設計，讓 KV cache 可完全保存於片上，避免頻繁存取 HBM 造成的延遲抖動。Google 官方明確表示：「TPU 8i 專為讓 AI Agent 能以極快速度完成多步驟工作流程而設計。」",{"type":557,"tag":623,"props":1822,"children":1823},{},[1824],{"type":557,"tag":558,"props":1825,"children":1826},{},[1827,1831,1834],{"type":557,"tag":630,"props":1828,"children":1829},{},[1830],{"type":562,"value":634},{"type":557,"tag":636,"props":1832,"children":1833},{},[],{"type":562,"value":1835},"\nKV cache(Key-Value Cache) ：大型語言模型推論時，將每輪對話的注意力機制中間結果快取起來，避免重複計算，是影響長對話延遲的核心資料結構。",{"type":557,"tag":558,"props":1837,"children":1838},{},[1839],{"type":562,"value":1840},"Managed Lustre 儲存系統可將資料直接送入加速器記憶體，消除 Agent 長上下文場景的 I/O 瓶頸。對比 NVIDIA NVLink 域上限（最多 576 顆晶片），Google 的橫向擴展架構在大規模 Agent 服務上具有結構性優勢。",{"type":557,"tag":601,"props":1842,"children":1844},{"id":1843},"章節三社群技術評析與未解疑問",[1845],{"type":562,"value":1846},"章節三：社群技術評析與未解疑問",{"type":557,"tag":558,"props":1848,"children":1849},{},[1850],{"type":562,"value":1851},"HN 社群對這波發表的態度分歧明顯。技術派關注 Broadcom + MediaTek 製造商組合的供應鏈含義，以及 97% Goodput 在大規模訓練中的實際意義。",{"type":557,"tag":558,"props":1853,"children":1854},{},[1855],{"type":562,"value":1856},"懷疑派則指向更核心的問題：硬體性能提升，是否真的會反映在 Gemini 模型的體驗改善，還是只降低 Google 自身的訓練成本？社群用戶 overfeed 點出實務痛點：新模型行為與前代不一致，導致 pipeline 依賴特定版本，代際升級帶來維運負擔。",{"type":557,"tag":558,"props":1858,"children":1859},{},[1860],{"type":562,"value":1861},"TechCrunch 引述分析師 Patrick Moorhead 的歷史警示：Google 早在 2016 年就聲稱 TPU 優於 Nvidia GPU，但 Nvidia 市值至今已達 5 兆美元。custom silicon 的技術優勢能否轉化為市場份額，仍是未解之問。",{"type":557,"tag":601,"props":1863,"children":1865},{"id":1864},"章節四google-自研晶片的長期戰略定位",[1866],{"type":562,"value":1867},"章節四：Google 自研晶片的長期戰略定位",{"type":557,"tag":558,"props":1869,"children":1870},{},[1871],{"type":562,"value":1872},"Google 此次明確表示不打算取代 Nvidia——2026 年稍晚仍將引入 Nvidia Vera Rubin 晶片，並與 Nvidia 在 Falcon 網路軟體上合作。這是「補充而非替代」的雙軌策略：自研 TPU 服務 Google 內部訓練與 Cloud 特定客戶，Nvidia GPU 服務更廣泛生態。",{"type":557,"tag":558,"props":1874,"children":1875},{},[1876],{"type":562,"value":1877},"The Decoder 指出：NVIDIA Rubin 單晶片峰值算力與記憶體頻寬仍高於 TPU 8t，但 NVLink 的域規模天花板（576 顆）限制了超大規模訓練的可能性。Google 的 Virgo Network 百萬晶片線性擴展，在超大規模預訓練場景具有結構性優勢。",{"type":557,"tag":558,"props":1879,"children":1880},{},[1881],{"type":562,"value":1882},"HN 社群估算，Google 內部使用 TPU 訓練模型的成本可能比 Nvidia GPU 低「一個數量級」——這個成本差距，或許才是 Google 垂直整合策略的真正護城河，而非晶片本身的效能數字。",{"title":271,"searchDepth":564,"depth":564,"links":1884},[],{"data":1886,"body":1888,"excerpt":-1,"toc":1894},{"title":271,"description":1887},"Google 此次雙晶片架構並非簡單的產品線拆分，而是對訓練與推論兩種工作負載在記憶體存取模式、通訊拓撲、延遲敏感度上根本差異的硬體回應。",{"type":554,"children":1889},[1890],{"type":557,"tag":558,"props":1891,"children":1892},{},[1893],{"type":562,"value":1887},{"title":271,"searchDepth":564,"depth":564,"links":1895},[],{"data":1897,"body":1899,"excerpt":-1,"toc":1920},{"title":271,"description":1898},"TPU 8t 透過 Virgo Network 實現橫向線性擴展，單一邏輯叢集可達 100 萬顆晶片，Goodput 超過 97%。interchip 頻寬為前代 2 倍，儲存存取速度為前代 10 倍，確保大規模訓練的有效算力利用率。",{"type":554,"children":1900},[1901,1905],{"type":557,"tag":558,"props":1902,"children":1903},{},[1904],{"type":562,"value":1898},{"type":557,"tag":623,"props":1906,"children":1907},{},[1908],{"type":557,"tag":558,"props":1909,"children":1910},{},[1911,1915,1918],{"type":557,"tag":630,"props":1912,"children":1913},{},[1914],{"type":562,"value":634},{"type":557,"tag":636,"props":1916,"children":1917},{},[],{"type":562,"value":1919},"\nGoodput（有效計算時間佔比）：在分散式訓練中，扣除通訊等待、記憶體搬移、設備故障等非計算時間後，真正用於模型訓練計算的時間比例。97% 意味著僅 3% 時間用於開銷，是業界極高水準。",{"title":271,"searchDepth":564,"depth":564,"links":1921},[],{"data":1923,"body":1925,"excerpt":-1,"toc":1946},{"title":271,"description":1924},"TPU 8i 配備 384 MB 片上 SRAM（前代 3 倍），讓 KV cache 完全駐留片上，消除 HBM 存取造成的延遲抖動。Boardfly 拓撲架構將網路直徑縮減 50%，Collectives Acceleration Engine 專門加速 MoE 模型集體通訊，最高降低 5 倍延遲。",{"type":554,"children":1926},[1927,1931],{"type":557,"tag":558,"props":1928,"children":1929},{},[1930],{"type":562,"value":1924},{"type":557,"tag":623,"props":1932,"children":1933},{},[1934],{"type":557,"tag":558,"props":1935,"children":1936},{},[1937,1941,1944],{"type":557,"tag":630,"props":1938,"children":1939},{},[1940],{"type":562,"value":634},{"type":557,"tag":636,"props":1942,"children":1943},{},[],{"type":562,"value":1945},"\nMoE(Mixture-of-Experts) ：一種稀疏神經網路架構，每次推論只激活模型中少數「專家」子網路，可在不增加推論成本的情況下大幅擴大模型總參數量。集體通訊是 MoE 的主要延遲來源之一。",{"title":271,"searchDepth":564,"depth":564,"links":1947},[],{"data":1949,"body":1951,"excerpt":-1,"toc":1972},{"title":271,"description":1950},"Virgo Network 是 TPU 8t 突破 NVLink 域規模上限的核心技術。NVIDIA NVLink 最大支援 576 顆晶片組成一個域，而 Virgo Network 可線性擴展至 100 萬顆晶片，在超大規模預訓練場景下提供結構性成本優勢。",{"type":554,"children":1952},[1953,1957],{"type":557,"tag":558,"props":1954,"children":1955},{},[1956],{"type":562,"value":1950},{"type":557,"tag":623,"props":1958,"children":1959},{},[1960],{"type":557,"tag":558,"props":1961,"children":1962},{},[1963,1967,1970],{"type":557,"tag":630,"props":1964,"children":1965},{},[1966],{"type":562,"value":806},{"type":557,"tag":636,"props":1968,"children":1969},{},[],{"type":562,"value":1971},"\n把 TPU 8t 想成高速公路系統：Virgo Network 是讓幾百萬輛車都能不塞車的多層立交系統；Goodput 97% 就像道路實際使用效率 97%，幾乎沒有空駛浪費。TPU 8i 則更像特種快遞：專跑短距離、高頻次的任務，每一包都要求最快到達。",{"title":271,"searchDepth":564,"depth":564,"links":1973},[],{"data":1975,"body":1976,"excerpt":-1,"toc":2097},{"title":271,"description":271},{"type":554,"children":1977},[1978,1982,2003,2007,2028,2032,2037,2042,2046,2064,2068,2086,2092],{"type":557,"tag":601,"props":1979,"children":1980},{"id":820},[1981],{"type":562,"value":820},{"type":557,"tag":824,"props":1983,"children":1984},{},[1985,1994],{"type":557,"tag":828,"props":1986,"children":1987},{},[1988,1992],{"type":557,"tag":630,"props":1989,"children":1990},{},[1991],{"type":562,"value":835},{"type":562,"value":1993},"：NVIDIA H100/H200、Blackwell B200、Vera Rubin（預計 2027）",{"type":557,"tag":828,"props":1995,"children":1996},{},[1997,2001],{"type":557,"tag":630,"props":1998,"children":1999},{},[2000],{"type":562,"value":845},{"type":562,"value":2002},"：AWS Trainium/Inferentia、Microsoft Azure Maia、Meta MTIA",{"type":557,"tag":601,"props":2004,"children":2005},{"id":850},[2006],{"type":562,"value":850},{"type":557,"tag":824,"props":2008,"children":2009},{},[2010,2019],{"type":557,"tag":828,"props":2011,"children":2012},{},[2013,2017],{"type":557,"tag":630,"props":2014,"children":2015},{},[2016],{"type":562,"value":863},{"type":562,"value":2018},"：Virgo Network 的百萬晶片線性擴展能力，NVIDIA NVLink 最多 576 顆晶片的架構上限難以短期突破；TPU 8i 的片上 SRAM KV cache 設計是針對 Agent 工作負載的特化優勢",{"type":557,"tag":828,"props":2020,"children":2021},{},[2022,2026],{"type":557,"tag":630,"props":2023,"children":2024},{},[2025],{"type":562,"value":873},{"type":562,"value":2027},"：TPU 只能在 Google Cloud 上使用，與 Gemini 模型、Vertex AI、Google Workspace 深度整合，形成「用 Google 模型就用 Google 晶片」的生態鎖定",{"type":557,"tag":601,"props":2029,"children":2030},{"id":878},[2031],{"type":562,"value":878},{"type":557,"tag":558,"props":2033,"children":2034},{},[2035],{"type":562,"value":2036},"Google 官方尚未公布 TPU 8t/8i 的具體定價。HN 社群估算，Google 內部使用 TPU 訓練模型的成本可能比 Nvidia GPU 低「一個數量級」，但這是內部成本，外部客戶能否享受同等折扣仍不明確。",{"type":557,"tag":558,"props":2038,"children":2039},{},[2040],{"type":562,"value":2041},"Google 明確表示 2026 年稍晚仍將引入 Nvidia Vera Rubin，表明定價策略不試圖在所有場景取代 Nvidia，而是針對特定高價值客戶提供 TPU 選項。",{"type":557,"tag":601,"props":2043,"children":2044},{"id":888},[2045],{"type":562,"value":888},{"type":557,"tag":824,"props":2047,"children":2048},{},[2049,2054,2059],{"type":557,"tag":828,"props":2050,"children":2051},{},[2052],{"type":562,"value":2053},"JAX/TensorFlow 生態鎖定：大多數企業 ML 工作負載建立在 PyTorch 上，遷移成本高",{"type":557,"tag":828,"props":2055,"children":2056},{},[2057],{"type":562,"value":2058},"Gemini 模型行為一致性問題：新模型版本行為不穩定，pipeline 依賴特定版本，代際升級帶來維運負擔",{"type":557,"tag":828,"props":2060,"children":2061},{},[2062],{"type":562,"value":2063},"2026 年底 GA 前的早期存取限制：企業採購決策週期與 GA 時程存在落差",{"type":557,"tag":601,"props":2065,"children":2066},{"id":906},[2067],{"type":562,"value":906},{"type":557,"tag":824,"props":2069,"children":2070},{},[2071,2076,2081],{"type":557,"tag":828,"props":2072,"children":2073},{},[2074],{"type":562,"value":2075},"若 Google 成功將訓練成本降低一個數量級，Gemini 定價空間將大幅擴大，可能引發 OpenAI/Anthropic 的降價壓力",{"type":557,"tag":828,"props":2077,"children":2078},{},[2079],{"type":562,"value":2080},"MediaTek 進入 AI 訓練晶片代工市場，可能影響台積電在 AI 晶片代工市場的份額分配",{"type":557,"tag":828,"props":2082,"children":2083},{},[2084],{"type":562,"value":2085},"Boardfly + Collectives Acceleration Engine 的 MoE 優化，暗示 Google 對 MoE 架構 Agent 模型的長期押注",{"type":557,"tag":601,"props":2087,"children":2089},{"id":2088},"判決結構性成本優勢存在生態鎖定是雙面刃採購前評估遷移成本",[2090],{"type":562,"value":2091},"判決：結構性成本優勢存在，生態鎖定是雙面刃（採購前評估遷移成本）",{"type":557,"tag":558,"props":2093,"children":2094},{},[2095],{"type":562,"value":2096},"TPU 8t/8i 的技術突破是真實的——百萬晶片擴展能力與 KV cache 片上化是競爭對手短期難以複製的設計。然而 Patrick Moorhead 的歷史警示值得銘記：2016 年 Google 已做出類似宣告，但 Nvidia 市值至今達 5 兆美元。技術領先能否轉化為企業採購選擇，仍取決於定價透明度與生態相容性。",{"title":271,"searchDepth":564,"depth":564,"links":2098},[],{"data":2100,"body":2101,"excerpt":-1,"toc":2193},{"title":271,"description":271},{"type":554,"children":2102},[2103,2108,2141,2147,2170,2175],{"type":557,"tag":601,"props":2104,"children":2106},{"id":2105},"算力指標",[2107],{"type":562,"value":2105},{"type":557,"tag":824,"props":2109,"children":2110},{},[2111,2116,2121,2126,2131,2136],{"type":557,"tag":828,"props":2112,"children":2113},{},[2114],{"type":562,"value":2115},"TPU 8t 單 superpod：121 ExaFlops",{"type":557,"tag":828,"props":2117,"children":2118},{},[2119],{"type":562,"value":2120},"TPU 8t superpod 晶片數：9,600 顆",{"type":557,"tag":828,"props":2122,"children":2123},{},[2124],{"type":562,"value":2125},"TPU 8t 可擴展至：100 萬顆晶片 (Virgo Network)",{"type":557,"tag":828,"props":2127,"children":2128},{},[2129],{"type":562,"value":2130},"Goodput：> 97%",{"type":557,"tag":828,"props":2132,"children":2133},{},[2134],{"type":562,"value":2135},"interchip 頻寬：前代 2 倍",{"type":557,"tag":828,"props":2137,"children":2138},{},[2139],{"type":562,"value":2140},"儲存存取速度：前代 10 倍",{"type":557,"tag":601,"props":2142,"children":2144},{"id":2143},"tpu-8i-記憶體規格",[2145],{"type":562,"value":2146},"TPU 8i 記憶體規格",{"type":557,"tag":824,"props":2148,"children":2149},{},[2150,2155,2160,2165],{"type":557,"tag":828,"props":2151,"children":2152},{},[2153],{"type":562,"value":2154},"HBM：288 GB",{"type":557,"tag":828,"props":2156,"children":2157},{},[2158],{"type":562,"value":2159},"片上 SRAM：384 MB（前代 3 倍）",{"type":557,"tag":828,"props":2161,"children":2162},{},[2163],{"type":562,"value":2164},"網路直徑縮減：50%（Boardfly 拓撲）",{"type":557,"tag":828,"props":2166,"children":2167},{},[2168],{"type":562,"value":2169},"MoE 集體通訊延遲降低：最高 5 倍",{"type":557,"tag":601,"props":2171,"children":2173},{"id":2172},"效率對比",[2174],{"type":562,"value":2172},{"type":557,"tag":824,"props":2176,"children":2177},{},[2178,2183,2188],{"type":557,"tag":828,"props":2179,"children":2180},{},[2181],{"type":562,"value":2182},"TPU 8t 性能／瓦特：較 Ironwood 提升 2 倍 (124% performance per watt)",{"type":557,"tag":828,"props":2184,"children":2185},{},[2186],{"type":562,"value":2187},"TPU 8i 性能／瓦特：較前代提升 117%",{"type":557,"tag":828,"props":2189,"children":2190},{},[2191],{"type":562,"value":2192},"NVIDIA NVLink 域上限：576 顆晶片（對比 Virgo Network 的 100 萬）",{"title":271,"searchDepth":564,"depth":564,"links":2194},[],{"data":2196,"body":2197,"excerpt":-1,"toc":2218},{"title":271,"description":271},{"type":554,"children":2198},[2199],{"type":557,"tag":824,"props":2200,"children":2201},{},[2202,2206,2210,2214],{"type":557,"tag":828,"props":2203,"children":2204},{},[2205],{"type":562,"value":215},{"type":557,"tag":828,"props":2207,"children":2208},{},[2209],{"type":562,"value":216},{"type":557,"tag":828,"props":2211,"children":2212},{},[2213],{"type":562,"value":217},{"type":557,"tag":828,"props":2215,"children":2216},{},[2217],{"type":562,"value":218},{"title":271,"searchDepth":564,"depth":564,"links":2219},[],{"data":2221,"body":2222,"excerpt":-1,"toc":2239},{"title":271,"description":271},{"type":554,"children":2223},[2224],{"type":557,"tag":824,"props":2225,"children":2226},{},[2227,2231,2235],{"type":557,"tag":828,"props":2228,"children":2229},{},[2230],{"type":562,"value":220},{"type":557,"tag":828,"props":2232,"children":2233},{},[2234],{"type":562,"value":221},{"type":557,"tag":828,"props":2236,"children":2237},{},[2238],{"type":562,"value":222},{"title":271,"searchDepth":564,"depth":564,"links":2240},[],{"data":2242,"body":2243,"excerpt":-1,"toc":2249},{"title":271,"description":226},{"type":554,"children":2244},[2245],{"type":557,"tag":558,"props":2246,"children":2247},{},[2248],{"type":562,"value":226},{"title":271,"searchDepth":564,"depth":564,"links":2250},[],{"data":2252,"body":2253,"excerpt":-1,"toc":2259},{"title":271,"description":227},{"type":554,"children":2254},[2255],{"type":557,"tag":558,"props":2256,"children":2257},{},[2258],{"type":562,"value":227},{"title":271,"searchDepth":564,"depth":564,"links":2260},[],{"data":2262,"body":2263,"excerpt":-1,"toc":2269},{"title":271,"description":228},{"type":554,"children":2264},[2265],{"type":557,"tag":558,"props":2266,"children":2267},{},[2268],{"type":562,"value":228},{"title":271,"searchDepth":564,"depth":564,"links":2270},[],{"data":2272,"body":2273,"excerpt":-1,"toc":2384},{"title":271,"description":271},{"type":554,"children":2274},[2275,2281,2302,2317,2322,2327,2332,2379],{"type":557,"tag":601,"props":2276,"children":2278},{"id":2277},"一行指令讓-ai-代理學會新技能",[2279],{"type":562,"value":2280},"一行指令，讓 AI 代理學會新技能",{"type":557,"tag":558,"props":2282,"children":2283},{},[2284,2286,2292,2294,2300],{"type":562,"value":2285},"Vercel Labs 推出開源工具 ",{"type":557,"tag":1380,"props":2287,"children":2289},{"className":2288},[],[2290],{"type":562,"value":2291},"skills",{"type":562,"value":2293},"，讓開發者透過 ",{"type":557,"tag":1380,"props":2295,"children":2297},{"className":2296},[],[2298],{"type":562,"value":2299},"npx skills add \u003Cpackage>",{"type":562,"value":2301}," 一行指令，為 AI 代理安裝「技能包」——封裝好的指令集，描述特定任務的執行方式，例如「按照團隊規範生成 PR」或「撰寫 release notes」。",{"type":557,"tag":623,"props":2303,"children":2304},{},[2305],{"type":557,"tag":558,"props":2306,"children":2307},{},[2308,2312,2315],{"type":557,"tag":630,"props":2309,"children":2310},{},[2311],{"type":562,"value":634},{"type":557,"tag":636,"props":2313,"children":2314},{},[],{"type":562,"value":2316},"\n技能包 (skill package) ：封裝特定任務執行邏輯的指令集合，AI 代理讀取後即可按規範執行，概念類似 npm 套件之於 Node.js 生態。",{"type":557,"tag":601,"props":2318,"children":2320},{"id":2319},"生態規模與技術細節",[2321],{"type":562,"value":2319},{"type":557,"tag":558,"props":2323,"children":2324},{},[2325],{"type":562,"value":2326},"截至 2026 年 4 月，專案已累積 15,500+ Stars，支援 45+ agent 平台，涵蓋 Claude Code、Cursor、GitHub Copilot、Gemini 等主流工具。",{"type":557,"tag":558,"props":2328,"children":2329},{},[2330],{"type":562,"value":2331},"核心命令：",{"type":557,"tag":824,"props":2333,"children":2334},{},[2335,2346,2357,2368],{"type":557,"tag":828,"props":2336,"children":2337},{},[2338,2344],{"type":557,"tag":1380,"props":2339,"children":2341},{"className":2340},[],[2342],{"type":562,"value":2343},"npx skills add",{"type":562,"value":2345},"：安裝技能包",{"type":557,"tag":828,"props":2347,"children":2348},{},[2349,2355],{"type":557,"tag":1380,"props":2350,"children":2352},{"className":2351},[],[2353],{"type":562,"value":2354},"npx skills find",{"type":562,"value":2356},"：互動探索目錄",{"type":557,"tag":828,"props":2358,"children":2359},{},[2360,2366],{"type":557,"tag":1380,"props":2361,"children":2363},{"className":2362},[],[2364],{"type":562,"value":2365},"npx skills update",{"type":562,"value":2367},"：更新已安裝技能",{"type":557,"tag":828,"props":2369,"children":2370},{},[2371,2377],{"type":557,"tag":1380,"props":2372,"children":2374},{"className":2373},[],[2375],{"type":562,"value":2376},"npx skills remove",{"type":562,"value":2378},"：移除技能",{"type":557,"tag":558,"props":2380,"children":2381},{},[2382],{"type":562,"value":2383},"安裝支援 GitHub shorthand、完整 URL、本地路徑，預設 symlink 方式讓更新立即生效。",{"title":271,"searchDepth":564,"depth":564,"links":2385},[],{"data":2387,"body":2389,"excerpt":-1,"toc":2414},{"title":271,"description":2388},"skills 相容 45+ agent 平台，遷移成本極低——只需 npx skills add owner/repo 即可將團隊規範固化為可分享的技能包。",{"type":554,"children":2390},[2391,2409],{"type":557,"tag":558,"props":2392,"children":2393},{},[2394,2399,2401,2407],{"type":557,"tag":1380,"props":2395,"children":2397},{"className":2396},[],[2398],{"type":562,"value":2291},{"type":562,"value":2400}," 相容 45+ agent 平台，遷移成本極低——只需 ",{"type":557,"tag":1380,"props":2402,"children":2404},{"className":2403},[],[2405],{"type":562,"value":2406},"npx skills add owner/repo",{"type":562,"value":2408}," 即可將團隊規範固化為可分享的技能包。",{"type":557,"tag":558,"props":2410,"children":2411},{},[2412],{"type":562,"value":2413},"本地路徑安裝讓私有技能不必公開發布，symlink 預設使更新立即生效，無需重新安裝。技能包本質是純文字指令集，無執行時相依，與 CLAUDE.md 等 agent-specific 設定互補，適合在多工具混用環境下標準化協作規範。",{"title":271,"searchDepth":564,"depth":564,"links":2415},[],{"data":2417,"body":2419,"excerpt":-1,"toc":2436},{"title":271,"description":2418},"skills 試圖建立 AI 代理能力的「npm 生態」——若成功，將使 agent 工作流最佳實踐跨組織、跨工具標準化。",{"type":554,"children":2420},[2421,2431],{"type":557,"tag":558,"props":2422,"children":2423},{},[2424,2429],{"type":557,"tag":1380,"props":2425,"children":2427},{"className":2426},[],[2428],{"type":562,"value":2291},{"type":562,"value":2430}," 試圖建立 AI 代理能力的「npm 生態」——若成功，將使 agent 工作流最佳實踐跨組織、跨工具標準化。",{"type":557,"tag":558,"props":2432,"children":2433},{},[2434],{"type":562,"value":2435},"15,500+ Stars 的早期採用速度顯示需求明確；Vercel 主導的開源策略搭配 skills.sh 技能目錄平台，有望形成中心化的技能分發節點，進一步鞏固 Vercel 在 AI 開發基礎設施的市場地位。",{"title":271,"searchDepth":564,"depth":564,"links":2437},[],{"data":2439,"body":2440,"excerpt":-1,"toc":2499},{"title":271,"description":271},{"type":554,"children":2441},[2442,2448,2453,2468,2473,2488,2494],{"type":557,"tag":601,"props":2443,"children":2445},{"id":2444},"ai-代筆作者上線語音匹配-全自動-x-內容生成",[2446],{"type":562,"value":2447},"AI 代筆作者上線：語音匹配 + 全自動 X 內容生成",{"type":557,"tag":558,"props":2449,"children":2450},{},[2451],{"type":562,"value":2452},"Stanley For X 於 2026 年 4 月 22 日在 Product Hunt 上線，定位為「全球首款 AI 內容總監」，專為 X（前 Twitter）平台打造。功能模組涵蓋 Niche 研究、即時趨勢分析、推文與主題串撰寫、贈品活動規劃，以及內容發布一致性追蹤。",{"type":557,"tag":623,"props":2454,"children":2455},{},[2456],{"type":557,"tag":558,"props":2457,"children":2458},{},[2459,2463,2466],{"type":557,"tag":630,"props":2460,"children":2461},{},[2462],{"type":562,"value":806},{"type":557,"tag":636,"props":2464,"children":2465},{},[],{"type":562,"value":2467},"\n想像你雇了一位懂你說話風格的文案顧問，他不只幫你寫推文，還替你分析趨勢、規劃內容主題串——這就是 Stanley 試圖做到的事。",{"type":557,"tag":558,"props":2469,"children":2470},{},[2471],{"type":562,"value":2472},"核心差異在於語音匹配 (Voice Matching) ：從用戶既有發文歷史學習寫作風格，而非套用通用模板。",{"type":557,"tag":623,"props":2474,"children":2475},{},[2476],{"type":557,"tag":558,"props":2477,"children":2478},{},[2479,2483,2486],{"type":557,"tag":630,"props":2480,"children":2481},{},[2482],{"type":562,"value":634},{"type":557,"tag":636,"props":2484,"children":2485},{},[],{"type":562,"value":2487},"\nVoice Matching：AI 分析歷史發文，提取語氣與詞彙習慣，讓生成內容聽起來像本人而非機器人。",{"type":557,"tag":601,"props":2489,"children":2491},{"id":2490},"背景stan-平台與-10-天開發紀錄",[2492],{"type":562,"value":2493},"背景：Stan 平台與 10 天開發紀錄",{"type":557,"tag":558,"props":2495,"children":2496},{},[2497],{"type":562,"value":2498},"開發者 Vitalii Dodonov（Stan 共同創辦人）使用同一框架，3 個月內將 X 帳號從 0 成長至 9,600+ 追蹤者。Stan 平台服務超過 60,000 名創作者，ARR 達 $30M。工具由 Vitalii 與知名代筆作者 Pascio 共同開發，從零到上線僅花 10 天。",{"title":271,"searchDepth":564,"depth":564,"links":2500},[],{"data":2502,"body":2504,"excerpt":-1,"toc":2536},{"title":271,"description":2503},"技術棧由 Claude(Anthropic)、Composio、Cloudflare 三層組成：LLM 作為推理核心，Composio 提供工具呼叫與多平台 API 整合層，Cloudflare 處理邊緣部署與即時同步。",{"type":554,"children":2505},[2506,2531],{"type":557,"tag":558,"props":2507,"children":2508},{},[2509,2511,2516,2518,2523,2524,2529],{"type":562,"value":2510},"技術棧由 ",{"type":557,"tag":630,"props":2512,"children":2513},{},[2514],{"type":562,"value":2515},"Claude(Anthropic)",{"type":562,"value":2517},"、",{"type":557,"tag":630,"props":2519,"children":2520},{},[2521],{"type":562,"value":2522},"Composio",{"type":562,"value":2517},{"type":557,"tag":630,"props":2525,"children":2526},{},[2527],{"type":562,"value":2528},"Cloudflare",{"type":562,"value":2530}," 三層組成：LLM 作為推理核心，Composio 提供工具呼叫與多平台 API 整合層，Cloudflare 處理邊緣部署與即時同步。",{"type":557,"tag":558,"props":2532,"children":2533},{},[2534],{"type":562,"value":2535},"支援 Web、iMessage、SMS、Telegram 多端存取，顯示 AI 工作流程工具正往即時通訊深度整合。10 天開發週期驗證了 Claude + Composio 的快速原型可行性，但 Voice Matching 品質與高併發穩定性仍待大規模驗證。",{"title":271,"searchDepth":564,"depth":564,"links":2537},[],{"data":2539,"body":2541,"excerpt":-1,"toc":2552},{"title":271,"description":2540},"Stan 平台 $30M ARR 的既有創作者用戶池，讓 Stanley 規避冷啟動問題，直接切入已付費的高價值族群。",{"type":554,"children":2542},[2543,2547],{"type":557,"tag":558,"props":2544,"children":2545},{},[2546],{"type":562,"value":2540},{"type":557,"tag":558,"props":2548,"children":2549},{},[2550],{"type":562,"value":2551},"相較於通用 AI 寫作工具，Stanley 嵌入了真實代筆作者 (Pascio) 的專業框架，試圖建立差異化護城河。定價採協商制，LinkedIn 版約 $149／月，若 X 版落在相近區間，月均 ROI 需對應帶來明確的粉絲成長或商業轉換才划算。",{"title":271,"searchDepth":564,"depth":564,"links":2553},[],{"data":2555,"body":2556,"excerpt":-1,"toc":2605},{"title":271,"description":271},{"type":554,"children":2557},[2558,2563,2575,2580,2595,2600],{"type":557,"tag":601,"props":2559,"children":2561},{"id":2560},"臨床輔助三大場景",[2562],{"type":562,"value":2560},{"type":557,"tag":558,"props":2564,"children":2565},{},[2566,2568,2573],{"type":562,"value":2567},"ChatGPT for Clinicians 向已驗證身份的美國執業醫師、執業護理師及藥劑師",{"type":557,"tag":630,"props":2569,"children":2570},{},[2571],{"type":562,"value":2572},"免費開放",{"type":562,"value":2574},"，涵蓋三大應用場景：臨床照護輔助、醫療文件撰寫，以及醫學研究查詢。",{"type":557,"tag":558,"props":2576,"children":2577},{},[2578],{"type":562,"value":2579},"系統可合成醫學證據，引用百萬篇同儕審閱研究，附帶期刊名稱與發表日期等可追溯來源。企業版另支援 HIPAA 合規運作，提供資料隔離儲存、客戶自管加密金鑰及稽核日誌，且輸入內容不用於模型訓練。",{"type":557,"tag":623,"props":2581,"children":2582},{},[2583],{"type":557,"tag":558,"props":2584,"children":2585},{},[2586,2590,2593],{"type":557,"tag":630,"props":2587,"children":2588},{},[2589],{"type":562,"value":634},{"type":557,"tag":636,"props":2591,"children":2592},{},[],{"type":562,"value":2594},"\nHealthBench：OpenAI 開發的醫療 AI 評估框架，依據臨床標準評核模型回應的安全性、清晰度與個別情境適當性。",{"type":557,"tag":601,"props":2596,"children":2598},{"id":2597},"兩年訓練成果",[2599],{"type":562,"value":2597},{"type":557,"tag":558,"props":2601,"children":2602},{},[2603],{"type":562,"value":2604},"OpenAI 歷時兩年與來自 60 個國家、涵蓋數十個專科的 260 多位醫師合作，累計提供超過 60 萬次評分回饋，涵蓋 30 個健康領域。所有輸出均以 HealthBench 框架評估，評核重點含安全性、清晰度與適當轉介建議。",{"title":271,"searchDepth":564,"depth":564,"links":2606},[],{"data":2608,"body":2610,"excerpt":-1,"toc":2629},{"title":271,"description":2609},"技術亮點在於可追溯引用：每則回應附帶期刊名稱與發表日期，而非僅輸出結論，這對臨床決策的可信賴性至關重要。",{"type":554,"children":2611},[2612,2624],{"type":557,"tag":558,"props":2613,"children":2614},{},[2615,2617,2622],{"type":562,"value":2616},"技術亮點在於",{"type":557,"tag":630,"props":2618,"children":2619},{},[2620],{"type":562,"value":2621},"可追溯引用",{"type":562,"value":2623},"：每則回應附帶期刊名稱與發表日期，而非僅輸出結論，這對臨床決策的可信賴性至關重要。",{"type":557,"tag":558,"props":2625,"children":2626},{},[2627],{"type":562,"value":2628},"企業整合若需對接 EHR 系統，應關注 HIPAA 合規架構三項基礎：資料隔離儲存、客戶自管加密金鑰 (CMEK) 、稽核日誌。HealthBench 作為醫療 AI 評估方法論，也是自行開發臨床輔助系統時值得參考的基準框架。",{"title":271,"searchDepth":564,"depth":564,"links":2630},[],{"data":2632,"body":2634,"excerpt":-1,"toc":2645},{"title":271,"description":2633},"免費開放是 OpenAI 的醫療市場卡位策略——先讓個人從業者養成依賴習慣，再以企業版 ChatGPT for Healthcare 承接機構採購。",{"type":554,"children":2635},[2636,2640],{"type":557,"tag":558,"props":2637,"children":2638},{},[2639],{"type":562,"value":2633},{"type":557,"tag":558,"props":2641,"children":2642},{},[2643],{"type":562,"value":2644},"OpenAI 以兩年訓練、260 位醫師、60 萬次回饋為技術背書，試圖在「可信賴性」上建立差異化。醫療 AI 監管門檻高、院方採購周期長，個人從業者直接可及的策略是繞過機構採購瓶頸的關鍵一步。",{"title":271,"searchDepth":564,"depth":564,"links":2646},[],{"data":2648,"body":2649,"excerpt":-1,"toc":2707},{"title":271,"description":271},{"type":554,"children":2650},[2651,2656,2661,2676,2681,2702],{"type":557,"tag":601,"props":2652,"children":2654},{"id":2653},"工具定位",[2655],{"type":562,"value":2653},{"type":557,"tag":558,"props":2657,"children":2658},{},[2659],{"type":562,"value":2660},"OpenAI 以 Apache 2.0 授權開源 Privacy Filter，專為文字 PII 偵測與遮蔽設計。架構採 sparse MoE 設計，1.5B 總參數、50M 激活參數，可在筆電或瀏覽器 (WebGPU) 本地運行，F1 分數達 96%，128K tokens 上下文可處理完整法律文件。",{"type":557,"tag":623,"props":2662,"children":2663},{},[2664],{"type":557,"tag":558,"props":2665,"children":2666},{},[2667,2671,2674],{"type":557,"tag":630,"props":2668,"children":2669},{},[2670],{"type":562,"value":634},{"type":557,"tag":636,"props":2672,"children":2673},{},[],{"type":562,"value":2675},"\nSparse MoE（稀疏混合專家）：模型雖有大量參數，每次推論只激活少數子網路，達到高容量、低運算的效果。",{"type":557,"tag":601,"props":2677,"children":2679},{"id":2678},"整合與限制",[2680],{"type":562,"value":2678},{"type":557,"tag":558,"props":2682,"children":2683},{},[2684,2686,2692,2694,2700],{"type":562,"value":2685},"支援 8 種 PII 類別（姓名、信箱、電話、地址、URL、日期、帳號、密碼），可透過 ",{"type":557,"tag":1380,"props":2687,"children":2689},{"className":2688},[],[2690],{"type":562,"value":2691},"pip install",{"type":562,"value":2693}," 部署，CLI 工具 ",{"type":557,"tag":1380,"props":2695,"children":2697},{"className":2696},[],[2698],{"type":562,"value":2699},"opf",{"type":562,"value":2701}," 提供遮蔽、評估、微調三種模式，也支援 Hugging Face Transformers pipeline 與 Transformers.js（含 WebGPU）。",{"type":557,"tag":558,"props":2703,"children":2704},{},[2705],{"type":562,"value":2706},"官方聲明這是「遮蔽輔助工具，並非合規或安全保證」，非英語文字與混合格式文件的效能有已知限制，建議在生產部署前進行領域評估。",{"title":271,"searchDepth":564,"depth":564,"links":2708},[],{"data":2710,"body":2711,"excerpt":-1,"toc":2717},{"title":271,"description":352},{"type":554,"children":2712},[2713],{"type":557,"tag":558,"props":2714,"children":2715},{},[2716],{"type":562,"value":352},{"title":271,"searchDepth":564,"depth":564,"links":2718},[],{"data":2720,"body":2721,"excerpt":-1,"toc":2727},{"title":271,"description":353},{"type":554,"children":2722},[2723],{"type":557,"tag":558,"props":2724,"children":2725},{},[2726],{"type":562,"value":353},{"title":271,"searchDepth":564,"depth":564,"links":2728},[],{"data":2730,"body":2731,"excerpt":-1,"toc":2756},{"title":271,"description":271},{"type":554,"children":2732},[2733,2738],{"type":557,"tag":601,"props":2734,"children":2736},{"id":2735},"效能基準",[2737],{"type":562,"value":2735},{"type":557,"tag":824,"props":2739,"children":2740},{},[2741,2746,2751],{"type":557,"tag":828,"props":2742,"children":2743},{},[2744],{"type":562,"value":2745},"F1 分數：96%",{"type":557,"tag":828,"props":2747,"children":2748},{},[2749],{"type":562,"value":2750},"激活參數：50M（總參數 1.5B）",{"type":557,"tag":828,"props":2752,"children":2753},{},[2754],{"type":562,"value":2755},"上下文視窗：128K tokens",{"title":271,"searchDepth":564,"depth":564,"links":2757},[],{"data":2759,"body":2760,"excerpt":-1,"toc":2807},{"title":271,"description":271},{"type":554,"children":2761},[2762,2767,2772,2787,2792,2797,2802],{"type":557,"tag":601,"props":2763,"children":2765},{"id":2764},"訂閱結構的裂縫",[2766],{"type":562,"value":2764},{"type":557,"tag":558,"props":2768,"children":2769},{},[2770],{"type":562,"value":2771},"2026 年 4 月 21-22 日，Anthropic 悄悄將 Claude Code 從 $20／月的 Pro 訂閱定價頁面移除，改以「✗」標示。用戶強烈反彈後數日內即恢復。Anthropic 成長負責人 Amol Avasare 說明，這屬於「針對約 2% 新用戶的小規模 A/B 測試」，並非正式下架。",{"type":557,"tag":623,"props":2773,"children":2774},{},[2775],{"type":557,"tag":558,"props":2776,"children":2777},{},[2778,2782,2785],{"type":557,"tag":630,"props":2779,"children":2780},{},[2781],{"type":562,"value":634},{"type":557,"tag":636,"props":2783,"children":2784},{},[],{"type":562,"value":2786},"\nA/B 測試：同時向不同用戶群展示不同版本，比較兩組反應以輔助決策的實驗方法。",{"type":557,"tag":558,"props":2788,"children":2789},{},[2790],{"type":562,"value":2791},"事件期間，各平台文件出現嚴重不一致：定價頁已移除、Claude Code 產品頁仍列於 Pro、客服文件改為僅提及 Max，甚至 claude.ai 聊天機器人仍告知用戶 Pro 包含 Claude Code。",{"type":557,"tag":601,"props":2793,"children":2795},{"id":2794},"算力壓力浮出檯面",[2796],{"type":562,"value":2794},{"type":557,"tag":558,"props":2798,"children":2799},{},[2800],{"type":562,"value":2801},"Avasare 坦言，Max 方案在 Claude Code 與 Cowork 問世前即已推出，「從未針對長期執行的 agent 工作流重新設計」。算力壓力有數據佐證：Anthropic API 90 天可用率僅 98.95%，低於雲端服務 99.99% 業界標準；GPU 租用成本同期上漲 48%。",{"type":557,"tag":558,"props":2803,"children":2804},{},[2805],{"type":562,"value":2806},"OpenAI 暫停部分 Sora 服務、GitHub Copilot 暫停新 Pro 方案報名，顯示業界普遍面臨算力瓶頸。",{"title":271,"searchDepth":564,"depth":564,"links":2808},[],{"data":2810,"body":2811,"excerpt":-1,"toc":2817},{"title":271,"description":380},{"type":554,"children":2812},[2813],{"type":557,"tag":558,"props":2814,"children":2815},{},[2816],{"type":562,"value":380},{"title":271,"searchDepth":564,"depth":564,"links":2818},[],{"data":2820,"body":2821,"excerpt":-1,"toc":2827},{"title":271,"description":381},{"type":554,"children":2822},[2823],{"type":557,"tag":558,"props":2824,"children":2825},{},[2826],{"type":562,"value":381},{"title":271,"searchDepth":564,"depth":564,"links":2828},[],{"data":2830,"body":2831,"excerpt":-1,"toc":2850},{"title":271,"description":271},{"type":554,"children":2832},[2833,2837],{"type":557,"tag":601,"props":2834,"children":2835},{"id":2105},[2836],{"type":562,"value":2105},{"type":557,"tag":824,"props":2838,"children":2839},{},[2840,2845],{"type":557,"tag":828,"props":2841,"children":2842},{},[2843],{"type":562,"value":2844},"Anthropic API 90 天可用率：98.95%（業界標準：99.99%）",{"type":557,"tag":828,"props":2846,"children":2847},{},[2848],{"type":562,"value":2849},"GPU 租用成本漲幅：+48%(Ornn Compute Price Index)",{"title":271,"searchDepth":564,"depth":564,"links":2851},[],{"data":2853,"body":2854,"excerpt":-1,"toc":2877},{"title":271,"description":271},{"type":554,"children":2855},[2856,2862,2867,2872],{"type":557,"tag":601,"props":2857,"children":2859},{"id":2858},"合約背景本月第三大前沿-ai-實驗室協議",[2860],{"type":562,"value":2861},"合約背景：本月第三大前沿 AI 實驗室協議",{"type":557,"tag":558,"props":2863,"children":2864},{},[2865],{"type":562,"value":2866},"2026 年 4 月 22 日，Google Cloud 與 Thinking Machines Lab 宣布簽署數十億美元雲端計算協議，於 Google Cloud Next '26（拉斯維加斯）正式公開。這是繼 Anthropic 與 Meta 之後，Google Cloud 本月第三個達成類似規模合作的前沿 AI 實驗室。協議採非獨家設計，Thinking Machines 可同時使用多家雲端服務商。",{"type":557,"tag":601,"props":2868,"children":2870},{"id":2869},"公司現況與挑戰",[2871],{"type":562,"value":2869},{"type":557,"tag":558,"props":2873,"children":2874},{},[2875],{"type":562,"value":2876},"Thinking Machines 由前 OpenAI 首席技術官 Mira Murati 於 2025 年 2 月創辦，以 120 億美元估值完成 20 億美元種子輪融資，旗下首款產品「Tinker」可自動化建立客製化前沿 AI 模型。員工數超過 130 人，但 Meta 已挖角 7 名創始成員（含 Tinker 首席工程師 Joshua Gross），人才流失壓力不容忽視。",{"title":271,"searchDepth":564,"depth":564,"links":2878},[],{"data":2880,"body":2881,"excerpt":-1,"toc":2887},{"title":271,"description":413},{"type":554,"children":2882},[2883],{"type":557,"tag":558,"props":2884,"children":2885},{},[2886],{"type":562,"value":413},{"title":271,"searchDepth":564,"depth":564,"links":2888},[],{"data":2890,"body":2891,"excerpt":-1,"toc":2897},{"title":271,"description":414},{"type":554,"children":2892},[2893],{"type":557,"tag":558,"props":2894,"children":2895},{},[2896],{"type":562,"value":414},{"title":271,"searchDepth":564,"depth":564,"links":2898},[],{"data":2900,"body":2901,"excerpt":-1,"toc":2923},{"title":271,"description":271},{"type":554,"children":2902},[2903,2908],{"type":557,"tag":601,"props":2904,"children":2906},{"id":2905},"效能指標",[2907],{"type":562,"value":2905},{"type":557,"tag":824,"props":2909,"children":2910},{},[2911],{"type":557,"tag":828,"props":2912,"children":2913},{},[2914,2916,2921],{"type":562,"value":2915},"A4X Max 虛擬機相比前一代 GPU：訓練與推理速度提升 ",{"type":557,"tag":630,"props":2917,"children":2918},{},[2919],{"type":562,"value":2920},"2 倍",{"type":562,"value":2922},"（Google Cloud 官方數據）",{"title":271,"searchDepth":564,"depth":564,"links":2924},[],{"data":2926,"body":2927,"excerpt":-1,"toc":2981},{"title":271,"description":271},{"type":554,"children":2928},[2929,2935,2948,2963,2968],{"type":557,"tag":601,"props":2930,"children":2932},{"id":2931},"websocket-模式核心機制",[2933],{"type":562,"value":2934},"WebSocket 模式核心機制",{"type":557,"tag":558,"props":2936,"children":2937},{},[2938,2940,2946],{"type":562,"value":2939},"OpenAI Responses API 新增 WebSocket 模式，透過持久連線與「連線本地快取 (connection-scoped caching) 」，大幅降低 agentic 工作流的延遲。每條連線在記憶體中僅保留最近一次 response 狀態，後續每輪只需傳送新的 input items 與 ",{"type":557,"tag":1380,"props":2941,"children":2943},{"className":2942},[],[2944],{"type":562,"value":2945},"previous_response_id",{"type":562,"value":2947},"，省去重傳完整對話歷史的冗餘開銷。",{"type":557,"tag":623,"props":2949,"children":2950},{},[2951],{"type":557,"tag":558,"props":2952,"children":2953},{},[2954,2958,2961],{"type":557,"tag":630,"props":2955,"children":2956},{},[2957],{"type":562,"value":634},{"type":557,"tag":636,"props":2959,"children":2960},{},[],{"type":562,"value":2962},"\nconnection-scoped caching：快取範圍僅限單一 WebSocket 連線存活期間，連線關閉後快取隨之釋放，無磁碟 I/O 依賴。",{"type":557,"tag":601,"props":2964,"children":2966},{"id":2965},"實測效能",[2967],{"type":562,"value":2965},{"type":557,"tag":558,"props":2969,"children":2970},{},[2971,2973,2979],{"type":562,"value":2972},"針對 20 次以上工具呼叫的 agentic 工作流（如 Codex agent loop），端對端執行速度可提升最高約 40%。連線限時 60 分鐘，超時後需重新連線，可接續已持久化的 response 或以壓縮上下文重起。支援 ",{"type":557,"tag":1380,"props":2974,"children":2976},{"className":2975},[],[2977],{"type":562,"value":2978},"store=false",{"type":562,"value":2980}," 與 Zero Data Retention(ZDR) ，符合高安全合規需求。",{"title":271,"searchDepth":564,"depth":564,"links":2982},[],{"data":2984,"body":2986,"excerpt":-1,"toc":3035},{"title":271,"description":2985},"整合 WebSocket 模式的關鍵在於改用事件驅動循環：每輪發送 response.create 事件，帶入 previous_response_id 及工具輸出。",{"type":554,"children":2987},[2988,3008],{"type":557,"tag":558,"props":2989,"children":2990},{},[2991,2993,2999,3001,3006],{"type":562,"value":2992},"整合 WebSocket 模式的關鍵在於改用事件驅動循環：每輪發送 ",{"type":557,"tag":1380,"props":2994,"children":2996},{"className":2995},[],[2997],{"type":562,"value":2998},"response.create",{"type":562,"value":3000}," 事件，帶入 ",{"type":557,"tag":1380,"props":3002,"children":3004},{"className":3003},[],[3005],{"type":562,"value":2945},{"type":562,"value":3007}," 及工具輸出。",{"type":557,"tag":558,"props":3009,"children":3010},{},[3011,3013,3018,3020,3025,3027,3033],{"type":562,"value":3012},"需注意若 ",{"type":557,"tag":1380,"props":3014,"children":3016},{"className":3015},[],[3017],{"type":562,"value":2945},{"type":562,"value":3019}," 不在快取且 ",{"type":557,"tag":1380,"props":3021,"children":3023},{"className":3022},[],[3024],{"type":562,"value":2978},{"type":562,"value":3026},"，會返回 ",{"type":557,"tag":1380,"props":3028,"children":3030},{"className":3029},[],[3031],{"type":562,"value":3032},"previous_response_not_found",{"type":562,"value":3034}," 錯誤，需在 agent loop 中加入重連 fallback 邏輯。連線 60 分鐘上限也需納入設計，建議搭配 warm 連線池管理。",{"title":271,"searchDepth":564,"depth":564,"links":3036},[],{"data":3038,"body":3040,"excerpt":-1,"toc":3051},{"title":271,"description":3039},"對於依賴 Codex 或自建 coding agent 的企業，40% 的延遲降幅直接轉化為開發者體驗提升與 API 成本最佳化，每輪少傳 context token 可降低費用。",{"type":554,"children":3041},[3042,3046],{"type":557,"tag":558,"props":3043,"children":3044},{},[3045],{"type":562,"value":3039},{"type":557,"tag":558,"props":3047,"children":3048},{},[3049],{"type":562,"value":3050},"ZDR 支援讓金融、醫療等高合規場景可安心採用。vLLM v0.10.0 已初步跟進 Responses API，顯示此模式正成為 agent 工作流新標準，宜及早評估遷移路徑。",{"title":271,"searchDepth":564,"depth":564,"links":3052},[],{"data":3054,"body":3055,"excerpt":-1,"toc":3075},{"title":271,"description":271},{"type":554,"children":3056},[3057,3062],{"type":557,"tag":601,"props":3058,"children":3060},{"id":3059},"效能數據",[3061],{"type":562,"value":3059},{"type":557,"tag":824,"props":3063,"children":3064},{},[3065,3070],{"type":557,"tag":828,"props":3066,"children":3067},{},[3068],{"type":562,"value":3069},"20+ 工具呼叫場景：端對端速度提升最高約 40%",{"type":557,"tag":828,"props":3071,"children":3072},{},[3073],{"type":562,"value":3074},"連線存活上限：60 分鐘",{"title":271,"searchDepth":564,"depth":564,"links":3076},[],{"data":3078,"body":3079,"excerpt":-1,"toc":3144},{"title":271,"description":271},{"type":554,"children":3080},[3081,3087,3092,3098,3110,3125],{"type":557,"tag":601,"props":3082,"children":3084},{"id":3083},"_2026-年-1-月舊聞近期社群討論再度升溫",[3085],{"type":562,"value":3086},"2026 年 1 月舊聞，近期社群討論再度升溫",{"type":557,"tag":558,"props":3088,"children":3089},{},[3090],{"type":562,"value":3091},"Core Automation 由前 OpenAI 副總裁 Jerry Tworek 於 2026 年 1 月創立，成立數週後即啟動種子輪募資，目標融資 5 億至 10 億美元。這一已成立逾兩個月的專案，近期因 Business Insider 深度報導及社群對其技術路線的廣泛討論而再度引發關注。",{"type":557,"tag":601,"props":3093,"children":3095},{"id":3094},"技術主張持續學習取代一次性預訓練",[3096],{"type":562,"value":3097},"技術主張：持續學習取代一次性預訓練",{"type":557,"tag":558,"props":3099,"children":3100},{},[3101,3103,3108],{"type":562,"value":3102},"Tworek 認為當前大模型架構「從根本上就有缺陷」——模型透過海量資料一次性預訓練後，遇到新資訊便出現",{"type":557,"tag":630,"props":3104,"children":3105},{},[3106],{"type":562,"value":3107},"災難性遺忘",{"type":562,"value":3109},"，無法即時吸收新知識。",{"type":557,"tag":623,"props":3111,"children":3112},{},[3113],{"type":557,"tag":558,"props":3114,"children":3115},{},[3116,3120,3123],{"type":557,"tag":630,"props":3117,"children":3118},{},[3119],{"type":562,"value":634},{"type":557,"tag":636,"props":3121,"children":3122},{},[],{"type":562,"value":3124},"\n災難性遺忘 (Catastrophic Forgetting) ：神經網路在學習新任務時，往往會覆蓋並忘記舊任務的知識，是持續學習的核心技術挑戰。",{"type":557,"tag":558,"props":3126,"children":3127},{},[3128,3130,3135,3137,3142],{"type":562,"value":3129},"旗艦模型 ",{"type":557,"tag":630,"props":3131,"children":3132},{},[3133],{"type":562,"value":3134},"Ceres",{"type":562,"value":3136}," 採用持續學習架構，可在生產環境中即時更新模型權重，無需完整重新訓練，並宣稱比主流大模型節省 ",{"type":557,"tag":630,"props":3138,"children":3139},{},[3140],{"type":562,"value":3141},"100 倍",{"type":562,"value":3143},"訓練資料與算力。研究方向涵蓋新型學習演算法與超越 Transformer 的高效架構。",{"title":271,"searchDepth":564,"depth":564,"links":3145},[],{"data":3147,"body":3148,"excerpt":-1,"toc":3154},{"title":271,"description":477},{"type":554,"children":3149},[3150],{"type":557,"tag":558,"props":3151,"children":3152},{},[3153],{"type":562,"value":477},{"title":271,"searchDepth":564,"depth":564,"links":3155},[],{"data":3157,"body":3158,"excerpt":-1,"toc":3164},{"title":271,"description":478},{"type":554,"children":3159},[3160],{"type":557,"tag":558,"props":3161,"children":3162},{},[3163],{"type":562,"value":478},{"title":271,"searchDepth":564,"depth":564,"links":3165},[],{"data":3167,"body":3168,"excerpt":-1,"toc":3242},{"title":271,"description":271},{"type":554,"children":3169},[3170,3175,3180,3195,3201,3206,3237],{"type":557,"tag":601,"props":3171,"children":3173},{"id":3172},"悄悄上線的遙測功能",[3174],{"type":562,"value":3172},{"type":557,"tag":558,"props":3176,"children":3177},{},[3178],{"type":562,"value":3179},"2026 年 4 月 22 日，GitHub CLI v2.91.0 隨版本更新靜默啟用假名遙測功能，預設為 opt-out——使用者必須主動關閉才能停止資料傳送。官方說明以「agentic adoption 成長，需要了解功能實際使用狀況」為由，但未在事前發出顯著公告。",{"type":557,"tag":623,"props":3181,"children":3182},{},[3183],{"type":557,"tag":558,"props":3184,"children":3185},{},[3186,3190,3193],{"type":557,"tag":630,"props":3187,"children":3188},{},[3189],{"type":562,"value":634},{"type":557,"tag":636,"props":3191,"children":3192},{},[],{"type":562,"value":3194},"\n假名遙測 (pseudoanonymous telemetry) ：以隨機生成的設備 UUID 取代真實身份，但仍可追蹤同一裝置的跨指令行為模式。",{"type":557,"tag":601,"props":3196,"children":3198},{"id":3197},"實際收集什麼",[3199],{"type":562,"value":3200},"實際收集什麼？",{"type":557,"tag":558,"props":3202,"children":3203},{},[3204],{"type":562,"value":3205},"CLI 會傳送指令名稱、使用的 flags、作業系統、CPU 架構、CLI 版本、時間戳記、設備 UUID，以及是否在 CI 環境執行、使用中的 AI agent 身份。關閉方式有三種：",{"type":557,"tag":3207,"props":3208,"children":3209},"ol",{},[3210,3219,3228],{"type":557,"tag":828,"props":3211,"children":3212},{},[3213],{"type":557,"tag":1380,"props":3214,"children":3216},{"className":3215},[],[3217],{"type":562,"value":3218},"export GH_TELEMETRY=false",{"type":557,"tag":828,"props":3220,"children":3221},{},[3222],{"type":557,"tag":1380,"props":3223,"children":3225},{"className":3224},[],[3226],{"type":562,"value":3227},"export DO_NOT_TRACK=true",{"type":557,"tag":828,"props":3229,"children":3230},{},[3231],{"type":557,"tag":1380,"props":3232,"children":3234},{"className":3233},[],[3235],{"type":562,"value":3236},"gh config set telemetry disabled",{"type":557,"tag":558,"props":3238,"children":3239},{},[3240],{"type":562,"value":3241},"注意：獨立安裝的 extension 與 agent 可能自行收集資料，不受上述 opt-out 機制約束。",{"title":271,"searchDepth":564,"depth":564,"links":3243},[],{"data":3245,"body":3247,"excerpt":-1,"toc":3282},{"title":271,"description":3246},"遙測本身不罕見，但 opt-out 而非 opt-in 的設計讓工程師警覺。社群已指出，gh 每條指令本就是 GitHub API 呼叫，伺服器端早已有完整 log，額外客戶端遙測的必要性存疑。",{"type":554,"children":3248},[3249,3262],{"type":557,"tag":558,"props":3250,"children":3251},{},[3252,3254,3260],{"type":562,"value":3253},"遙測本身不罕見，但 opt-out 而非 opt-in 的設計讓工程師警覺。社群已指出，",{"type":557,"tag":1380,"props":3255,"children":3257},{"className":3256},[],[3258],{"type":562,"value":3259},"gh",{"type":562,"value":3261}," 每條指令本就是 GitHub API 呼叫，伺服器端早已有完整 log，額外客戶端遙測的必要性存疑。",{"type":557,"tag":558,"props":3263,"children":3264},{},[3265,3267,3272,3274,3280],{"type":562,"value":3266},"建議立即執行 ",{"type":557,"tag":1380,"props":3268,"children":3270},{"className":3269},[],[3271],{"type":562,"value":3236},{"type":562,"value":3273},"，並在 CI/CD pipeline 環境變數中加入 ",{"type":557,"tag":1380,"props":3275,"children":3277},{"className":3276},[],[3278],{"type":562,"value":3279},"GH_TELEMETRY=false",{"type":562,"value":3281},"，避免 agent 行為資料在自動化流程中被靜默收集。各 extension 是否自行蒐集資料需個別確認，opt-out 機制不一定涵蓋。",{"title":271,"searchDepth":564,"depth":564,"links":3283},[],{"data":3285,"body":3287,"excerpt":-1,"toc":3298},{"title":271,"description":3286},"此次事件折射出更大趨勢：隨著 AI agent 深入開發工作流程，工具供應商對使用行為資料的渴求急速升溫。GitHub 的措辭 (agentic adoption) 暗示，遙測設計的主要目標是追蹤 AI agent 使用模式，而非傳統 CLI 操作。",{"type":554,"children":3288},[3289,3293],{"type":557,"tag":558,"props":3290,"children":3291},{},[3292],{"type":562,"value":3286},{"type":557,"tag":558,"props":3294,"children":3295},{},[3296],{"type":562,"value":3297},"企業需評估開發工具的資料收集範圍是否符合內部安全政策，CI/CD 自動化場景中 repo 可見性與 owner 資訊均在收集列表，屬敏感資料，應及早確認並關閉，並追蹤 GitHub 是否在社群壓力下改為 opt-in 設計。",{"title":271,"searchDepth":564,"depth":564,"links":3299},[],{"data":3301,"body":3302,"excerpt":-1,"toc":3383},{"title":271,"description":271},{"type":554,"children":3303},[3304,3310,3315,3320,3325,3330,3336,3341,3346,3352,3357,3362,3367,3373,3378],{"type":557,"tag":601,"props":3305,"children":3307},{"id":3306},"段落-1社群熱議排行",[3308],{"type":562,"value":3309},"段落 1：社群熱議排行",{"type":557,"tag":558,"props":3311,"children":3312},{},[3313],{"type":562,"value":3314},"今日最高熱度：Qwen3.6-27B 開源釋出席捲 Reddit r/LocalLLaMA，timkellogg.me（Bluesky，56 upvotes）直呼「他們把這個小模型拿去跟 Opus 4.5 比，表現相當不錯，讓我震驚了。」",{"type":557,"tag":558,"props":3316,"children":3317},{},[3318],{"type":562,"value":3319},"排名第二為 Anthropic Pro/Max 方案風波：Ed Zitron（Bluesky，322 upvotes）率先爆料 Claude Code 疑似從 $20／月方案移除，260 upvotes 的後續貼文繼續追問官方撤回的真實原因。",{"type":557,"tag":558,"props":3321,"children":3322},{},[3323],{"type":562,"value":3324},"GitHub CLI 遙測事件緊追其後，kat cosgrove（Bluesky，81 upvotes）公開呼籲「應設計為 opt-in 而非 opt-out」，HN 多則討論串持續延燒。",{"type":557,"tag":558,"props":3326,"children":3327},{},[3328],{"type":562,"value":3329},"OpenAI WebSocket Responses API（sasipi247，HN）和 ChatGPT 醫療人員免費開放（@Jonas_Vollmer，X）分列第四、五，均獲大量實戰回報。",{"type":557,"tag":601,"props":3331,"children":3333},{"id":3332},"段落-2技術爭議與分歧",[3334],{"type":562,"value":3335},"段落 2：技術爭議與分歧",{"type":557,"tag":558,"props":3337,"children":3338},{},[3339],{"type":562,"value":3340},"本地部署派 vs. 雲端 API 派：u/_raydeStar(Reddit r/LocalLLaMA) 直指「Opus 收緊限制並調漲定價——這對本地部署來說是完美的風暴。」@kylehessling1(X) 以前端設計測試與 agentic 基準實測，驗證 Qwen3.6-27B 遠超預期。",{"type":557,"tag":558,"props":3342,"children":3343},{},[3344],{"type":562,"value":3345},"隱私權立場對立：GitHub CLI 遙測事件中，user3939382(HN) 諷刺「在未經你同意的情況下監視你……是為了你好」；caymanjim(HN) 對官方「我們需要可見性」措辭直言「你們根本不需要」。爭論焦點在於預設行為是否應以使用者知情為前提。",{"type":557,"tag":601,"props":3347,"children":3349},{"id":3348},"段落-3實戰經驗最高價值",[3350],{"type":562,"value":3351},"段落 3：實戰經驗（最高價值）",{"type":557,"tag":558,"props":3353,"children":3354},{},[3355],{"type":562,"value":3356},"sasipi247(HN) 實測 OpenAI Responses API WebSocket 模式逾一個月，描述「在 workers 上保持 WebSocket session 熱啟動，透過 NATS JetStream 做命令路由」，整體效能感受是「一次飛躍」。",{"type":557,"tag":558,"props":3358,"children":3359},{},[3360],{"type":562,"value":3361},"Qwen3.6 量化版選擇方面，syntaxing(HN) 建議「Q8 或 Q6_UD，且不要進行 KV cache 量化」，指出 MoE 模型激活參數少時此差異更顯著。",{"type":557,"tag":558,"props":3363,"children":3364},{},[3365],{"type":562,"value":3366},"醫療 AI 快速滲透現實：@Jonas_Vollmer(X) 引述急救室醫生友人說法：「大多數醫生每天都在用 ChatGPT……習慣性地把完整的匿名患者病歷貼進個人帳號，目前的採用程度幾乎毫無阻力。」",{"type":557,"tag":601,"props":3368,"children":3370},{"id":3369},"段落-4未解問題與社群預期",[3371],{"type":562,"value":3372},"段落 4：未解問題與社群預期",{"type":557,"tag":558,"props":3374,"children":3375},{},[3376],{"type":562,"value":3377},"Anthropicç 訂閱架構走向未明：@koltregaskes(X) 已從 Claude Max 切換至 ChatGPT Pro，vessenes(HN) 直指「今天 Opus 的 API 費用高達 $250，已把 openclaw 指向 Codex」，但官方仍未公告正式架構調整時程。",{"type":557,"tag":558,"props":3379,"children":3380},{},[3381],{"type":562,"value":3382},"GitHub 遙測機制是否改為 opt-in，以及 Core Automation 持續學習技術路線能否落地，均是社群關切但未獲回應的問題。overfeed(HN) 點出 Agentic pipeline 規模化的核心障礙：「新模型在每個任務上的行為與前代不同。如果這是決定性因素，那自架才是唯一解。」",{"title":271,"searchDepth":564,"depth":564,"links":3384},[],{"data":3386,"body":3387,"excerpt":-1,"toc":3393},{"title":271,"description":547},{"type":554,"children":3388},[3389],{"type":557,"tag":558,"props":3390,"children":3391},{},[3392],{"type":562,"value":547},{"title":271,"searchDepth":564,"depth":564,"links":3394},[],{"data":3396,"body":3397,"excerpt":-1,"toc":4155},{"title":271,"description":271},{"type":554,"children":3398},[3399,3403,3408,3412,4096,4100,4105,4109,4127,4131,4149],{"type":557,"tag":601,"props":3400,"children":3401},{"id":1348},[3402],{"type":562,"value":1348},{"type":557,"tag":558,"props":3404,"children":3405},{},[3406],{"type":562,"value":3407},"完整 F16 精度需 55.6GB VRAM（單張 A100-80G 或雙張 RTX 4090）；Q4_K_M 量化版約 16.8GB，可在單張 RTX 4090 或 RX 7900 XTX 上運行；Q8 量化約 30GB，適合具備全頻寬雙 PCIe 插槽的雙 16GB GPU 配置。建議 Python 3.10+、transformers >= 4.51.0。",{"type":557,"tag":601,"props":3409,"children":3410},{"id":1363},[3411],{"type":562,"value":1366},{"type":557,"tag":1373,"props":3413,"children":3417},{"className":3414,"code":3415,"language":3416,"meta":271,"style":271},"language-python shiki shiki-themes vitesse-dark","from transformers import AutoModelForCausalLM, AutoTokenizer\n\nmodel_name = \"Qwen/Qwen3.6-27B\"\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForCausalLM.from_pretrained(\n    model_name,\n    torch_dtype=\"auto\",\n    device_map=\"auto\"\n)\n\nmessages = [{\"role\": \"user\", \"content\": \"撰寫一個 binary search tree 的插入函式\"}]\ntext = tokenizer.apply_chat_template(\n    messages,\n    tokenize=False,\n    add_generation_prompt=True,\n    enable_thinking=True\n)\nmodel_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\noutputs = model.generate(**model_inputs, max_new_tokens=8192)\nprint(tokenizer.decode(outputs[0][len(model_inputs.input_ids[0]):]))\n","python",[3418],{"type":557,"tag":1380,"props":3419,"children":3420},{"__ignoreMap":271},[3421,3459,3468,3499,3541,3570,3584,3616,3641,3649,3657,3745,3776,3789,3811,3833,3851,3859,3944,4009],{"type":557,"tag":1783,"props":3422,"children":3425},{"class":3423,"line":3424},"line",1,[3426,3432,3438,3443,3448,3454],{"type":557,"tag":1783,"props":3427,"children":3429},{"style":3428},"--shiki-default:#4D9375",[3430],{"type":562,"value":3431},"from",{"type":557,"tag":1783,"props":3433,"children":3435},{"style":3434},"--shiki-default:#DBD7CAEE",[3436],{"type":562,"value":3437}," transformers ",{"type":557,"tag":1783,"props":3439,"children":3440},{"style":3428},[3441],{"type":562,"value":3442},"import",{"type":557,"tag":1783,"props":3444,"children":3445},{"style":3434},[3446],{"type":562,"value":3447}," AutoModelForCausalLM",{"type":557,"tag":1783,"props":3449,"children":3451},{"style":3450},"--shiki-default:#666666",[3452],{"type":562,"value":3453},",",{"type":557,"tag":1783,"props":3455,"children":3456},{"style":3434},[3457],{"type":562,"value":3458}," AutoTokenizer\n",{"type":557,"tag":1783,"props":3460,"children":3461},{"class":3423,"line":564},[3462],{"type":557,"tag":1783,"props":3463,"children":3465},{"emptyLinePlaceholder":3464},true,[3466],{"type":562,"value":3467},"\n",{"type":557,"tag":1783,"props":3469,"children":3471},{"class":3423,"line":3470},3,[3472,3477,3482,3488,3494],{"type":557,"tag":1783,"props":3473,"children":3474},{"style":3434},[3475],{"type":562,"value":3476},"model_name ",{"type":557,"tag":1783,"props":3478,"children":3479},{"style":3450},[3480],{"type":562,"value":3481},"=",{"type":557,"tag":1783,"props":3483,"children":3485},{"style":3484},"--shiki-default:#C98A7D77",[3486],{"type":562,"value":3487}," \"",{"type":557,"tag":1783,"props":3489,"children":3491},{"style":3490},"--shiki-default:#C98A7D",[3492],{"type":562,"value":3493},"Qwen/Qwen3.6-27B",{"type":557,"tag":1783,"props":3495,"children":3496},{"style":3484},[3497],{"type":562,"value":3498},"\"\n",{"type":557,"tag":1783,"props":3500,"children":3501},{"class":3423,"line":95},[3502,3507,3511,3516,3521,3526,3531,3536],{"type":557,"tag":1783,"props":3503,"children":3504},{"style":3434},[3505],{"type":562,"value":3506},"tokenizer ",{"type":557,"tag":1783,"props":3508,"children":3509},{"style":3450},[3510],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3512,"children":3513},{"style":3434},[3514],{"type":562,"value":3515}," AutoTokenizer",{"type":557,"tag":1783,"props":3517,"children":3518},{"style":3450},[3519],{"type":562,"value":3520},".",{"type":557,"tag":1783,"props":3522,"children":3523},{"style":3434},[3524],{"type":562,"value":3525},"from_pretrained",{"type":557,"tag":1783,"props":3527,"children":3528},{"style":3450},[3529],{"type":562,"value":3530},"(",{"type":557,"tag":1783,"props":3532,"children":3533},{"style":3434},[3534],{"type":562,"value":3535},"model_name",{"type":557,"tag":1783,"props":3537,"children":3538},{"style":3450},[3539],{"type":562,"value":3540},")\n",{"type":557,"tag":1783,"props":3542,"children":3543},{"class":3423,"line":96},[3544,3549,3553,3557,3561,3565],{"type":557,"tag":1783,"props":3545,"children":3546},{"style":3434},[3547],{"type":562,"value":3548},"model ",{"type":557,"tag":1783,"props":3550,"children":3551},{"style":3450},[3552],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3554,"children":3555},{"style":3434},[3556],{"type":562,"value":3447},{"type":557,"tag":1783,"props":3558,"children":3559},{"style":3450},[3560],{"type":562,"value":3520},{"type":557,"tag":1783,"props":3562,"children":3563},{"style":3434},[3564],{"type":562,"value":3525},{"type":557,"tag":1783,"props":3566,"children":3567},{"style":3450},[3568],{"type":562,"value":3569},"(\n",{"type":557,"tag":1783,"props":3571,"children":3573},{"class":3423,"line":3572},6,[3574,3579],{"type":557,"tag":1783,"props":3575,"children":3576},{"style":3434},[3577],{"type":562,"value":3578},"    model_name",{"type":557,"tag":1783,"props":3580,"children":3581},{"style":3450},[3582],{"type":562,"value":3583},",\n",{"type":557,"tag":1783,"props":3585,"children":3587},{"class":3423,"line":3586},7,[3588,3594,3598,3603,3608,3612],{"type":557,"tag":1783,"props":3589,"children":3591},{"style":3590},"--shiki-default:#BD976A",[3592],{"type":562,"value":3593},"    torch_dtype",{"type":557,"tag":1783,"props":3595,"children":3596},{"style":3450},[3597],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3599,"children":3600},{"style":3484},[3601],{"type":562,"value":3602},"\"",{"type":557,"tag":1783,"props":3604,"children":3605},{"style":3490},[3606],{"type":562,"value":3607},"auto",{"type":557,"tag":1783,"props":3609,"children":3610},{"style":3484},[3611],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3613,"children":3614},{"style":3450},[3615],{"type":562,"value":3583},{"type":557,"tag":1783,"props":3617,"children":3619},{"class":3423,"line":3618},8,[3620,3625,3629,3633,3637],{"type":557,"tag":1783,"props":3621,"children":3622},{"style":3590},[3623],{"type":562,"value":3624},"    device_map",{"type":557,"tag":1783,"props":3626,"children":3627},{"style":3450},[3628],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3630,"children":3631},{"style":3484},[3632],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3634,"children":3635},{"style":3490},[3636],{"type":562,"value":3607},{"type":557,"tag":1783,"props":3638,"children":3639},{"style":3484},[3640],{"type":562,"value":3498},{"type":557,"tag":1783,"props":3642,"children":3644},{"class":3423,"line":3643},9,[3645],{"type":557,"tag":1783,"props":3646,"children":3647},{"style":3450},[3648],{"type":562,"value":3540},{"type":557,"tag":1783,"props":3650,"children":3652},{"class":3423,"line":3651},10,[3653],{"type":557,"tag":1783,"props":3654,"children":3655},{"emptyLinePlaceholder":3464},[3656],{"type":562,"value":3467},{"type":557,"tag":1783,"props":3658,"children":3660},{"class":3423,"line":3659},11,[3661,3666,3670,3675,3679,3684,3688,3693,3697,3702,3706,3710,3714,3719,3723,3727,3731,3736,3740],{"type":557,"tag":1783,"props":3662,"children":3663},{"style":3434},[3664],{"type":562,"value":3665},"messages ",{"type":557,"tag":1783,"props":3667,"children":3668},{"style":3450},[3669],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3671,"children":3672},{"style":3450},[3673],{"type":562,"value":3674}," [{",{"type":557,"tag":1783,"props":3676,"children":3677},{"style":3484},[3678],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3680,"children":3681},{"style":3490},[3682],{"type":562,"value":3683},"role",{"type":557,"tag":1783,"props":3685,"children":3686},{"style":3484},[3687],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3689,"children":3690},{"style":3450},[3691],{"type":562,"value":3692},":",{"type":557,"tag":1783,"props":3694,"children":3695},{"style":3484},[3696],{"type":562,"value":3487},{"type":557,"tag":1783,"props":3698,"children":3699},{"style":3490},[3700],{"type":562,"value":3701},"user",{"type":557,"tag":1783,"props":3703,"children":3704},{"style":3484},[3705],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3707,"children":3708},{"style":3450},[3709],{"type":562,"value":3453},{"type":557,"tag":1783,"props":3711,"children":3712},{"style":3484},[3713],{"type":562,"value":3487},{"type":557,"tag":1783,"props":3715,"children":3716},{"style":3490},[3717],{"type":562,"value":3718},"content",{"type":557,"tag":1783,"props":3720,"children":3721},{"style":3484},[3722],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3724,"children":3725},{"style":3450},[3726],{"type":562,"value":3692},{"type":557,"tag":1783,"props":3728,"children":3729},{"style":3484},[3730],{"type":562,"value":3487},{"type":557,"tag":1783,"props":3732,"children":3733},{"style":3490},[3734],{"type":562,"value":3735},"撰寫一個 binary search tree 的插入函式",{"type":557,"tag":1783,"props":3737,"children":3738},{"style":3484},[3739],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3741,"children":3742},{"style":3450},[3743],{"type":562,"value":3744},"}]\n",{"type":557,"tag":1783,"props":3746,"children":3748},{"class":3423,"line":3747},12,[3749,3754,3758,3763,3767,3772],{"type":557,"tag":1783,"props":3750,"children":3751},{"style":3434},[3752],{"type":562,"value":3753},"text ",{"type":557,"tag":1783,"props":3755,"children":3756},{"style":3450},[3757],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3759,"children":3760},{"style":3434},[3761],{"type":562,"value":3762}," tokenizer",{"type":557,"tag":1783,"props":3764,"children":3765},{"style":3450},[3766],{"type":562,"value":3520},{"type":557,"tag":1783,"props":3768,"children":3769},{"style":3434},[3770],{"type":562,"value":3771},"apply_chat_template",{"type":557,"tag":1783,"props":3773,"children":3774},{"style":3450},[3775],{"type":562,"value":3569},{"type":557,"tag":1783,"props":3777,"children":3779},{"class":3423,"line":3778},13,[3780,3785],{"type":557,"tag":1783,"props":3781,"children":3782},{"style":3434},[3783],{"type":562,"value":3784},"    messages",{"type":557,"tag":1783,"props":3786,"children":3787},{"style":3450},[3788],{"type":562,"value":3583},{"type":557,"tag":1783,"props":3790,"children":3792},{"class":3423,"line":3791},14,[3793,3798,3802,3807],{"type":557,"tag":1783,"props":3794,"children":3795},{"style":3590},[3796],{"type":562,"value":3797},"    tokenize",{"type":557,"tag":1783,"props":3799,"children":3800},{"style":3450},[3801],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3803,"children":3804},{"style":3428},[3805],{"type":562,"value":3806},"False",{"type":557,"tag":1783,"props":3808,"children":3809},{"style":3450},[3810],{"type":562,"value":3583},{"type":557,"tag":1783,"props":3812,"children":3814},{"class":3423,"line":3813},15,[3815,3820,3824,3829],{"type":557,"tag":1783,"props":3816,"children":3817},{"style":3590},[3818],{"type":562,"value":3819},"    add_generation_prompt",{"type":557,"tag":1783,"props":3821,"children":3822},{"style":3450},[3823],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3825,"children":3826},{"style":3428},[3827],{"type":562,"value":3828},"True",{"type":557,"tag":1783,"props":3830,"children":3831},{"style":3450},[3832],{"type":562,"value":3583},{"type":557,"tag":1783,"props":3834,"children":3836},{"class":3423,"line":3835},16,[3837,3842,3846],{"type":557,"tag":1783,"props":3838,"children":3839},{"style":3590},[3840],{"type":562,"value":3841},"    enable_thinking",{"type":557,"tag":1783,"props":3843,"children":3844},{"style":3450},[3845],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3847,"children":3848},{"style":3428},[3849],{"type":562,"value":3850},"True\n",{"type":557,"tag":1783,"props":3852,"children":3854},{"class":3423,"line":3853},17,[3855],{"type":557,"tag":1783,"props":3856,"children":3857},{"style":3450},[3858],{"type":562,"value":3540},{"type":557,"tag":1783,"props":3860,"children":3862},{"class":3423,"line":3861},18,[3863,3868,3872,3876,3881,3885,3890,3895,3899,3903,3908,3912,3917,3922,3926,3931,3935,3940],{"type":557,"tag":1783,"props":3864,"children":3865},{"style":3434},[3866],{"type":562,"value":3867},"model_inputs ",{"type":557,"tag":1783,"props":3869,"children":3870},{"style":3450},[3871],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3873,"children":3874},{"style":3434},[3875],{"type":562,"value":3762},{"type":557,"tag":1783,"props":3877,"children":3878},{"style":3450},[3879],{"type":562,"value":3880},"([",{"type":557,"tag":1783,"props":3882,"children":3883},{"style":3434},[3884],{"type":562,"value":562},{"type":557,"tag":1783,"props":3886,"children":3887},{"style":3450},[3888],{"type":562,"value":3889},"],",{"type":557,"tag":1783,"props":3891,"children":3892},{"style":3590},[3893],{"type":562,"value":3894}," return_tensors",{"type":557,"tag":1783,"props":3896,"children":3897},{"style":3450},[3898],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3900,"children":3901},{"style":3484},[3902],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3904,"children":3905},{"style":3490},[3906],{"type":562,"value":3907},"pt",{"type":557,"tag":1783,"props":3909,"children":3910},{"style":3484},[3911],{"type":562,"value":3602},{"type":557,"tag":1783,"props":3913,"children":3914},{"style":3450},[3915],{"type":562,"value":3916},").",{"type":557,"tag":1783,"props":3918,"children":3919},{"style":3434},[3920],{"type":562,"value":3921},"to",{"type":557,"tag":1783,"props":3923,"children":3924},{"style":3450},[3925],{"type":562,"value":3530},{"type":557,"tag":1783,"props":3927,"children":3928},{"style":3434},[3929],{"type":562,"value":3930},"model",{"type":557,"tag":1783,"props":3932,"children":3933},{"style":3450},[3934],{"type":562,"value":3520},{"type":557,"tag":1783,"props":3936,"children":3937},{"style":3434},[3938],{"type":562,"value":3939},"device",{"type":557,"tag":1783,"props":3941,"children":3942},{"style":3450},[3943],{"type":562,"value":3540},{"type":557,"tag":1783,"props":3945,"children":3947},{"class":3423,"line":3946},19,[3948,3953,3957,3962,3966,3971,3975,3981,3986,3990,3995,3999,4005],{"type":557,"tag":1783,"props":3949,"children":3950},{"style":3434},[3951],{"type":562,"value":3952},"outputs ",{"type":557,"tag":1783,"props":3954,"children":3955},{"style":3450},[3956],{"type":562,"value":3481},{"type":557,"tag":1783,"props":3958,"children":3959},{"style":3434},[3960],{"type":562,"value":3961}," model",{"type":557,"tag":1783,"props":3963,"children":3964},{"style":3450},[3965],{"type":562,"value":3520},{"type":557,"tag":1783,"props":3967,"children":3968},{"style":3434},[3969],{"type":562,"value":3970},"generate",{"type":557,"tag":1783,"props":3972,"children":3973},{"style":3450},[3974],{"type":562,"value":3530},{"type":557,"tag":1783,"props":3976,"children":3978},{"style":3977},"--shiki-default:#CB7676",[3979],{"type":562,"value":3980},"**",{"type":557,"tag":1783,"props":3982,"children":3983},{"style":3434},[3984],{"type":562,"value":3985},"model_inputs",{"type":557,"tag":1783,"props":3987,"children":3988},{"style":3450},[3989],{"type":562,"value":3453},{"type":557,"tag":1783,"props":3991,"children":3992},{"style":3590},[3993],{"type":562,"value":3994}," max_new_tokens",{"type":557,"tag":1783,"props":3996,"children":3997},{"style":3450},[3998],{"type":562,"value":3481},{"type":557,"tag":1783,"props":4000,"children":4002},{"style":4001},"--shiki-default:#4C9A91",[4003],{"type":562,"value":4004},"8192",{"type":557,"tag":1783,"props":4006,"children":4007},{"style":3450},[4008],{"type":562,"value":3540},{"type":557,"tag":1783,"props":4010,"children":4012},{"class":3423,"line":4011},20,[4013,4019,4023,4028,4032,4037,4041,4046,4051,4056,4061,4066,4070,4074,4078,4083,4087,4091],{"type":557,"tag":1783,"props":4014,"children":4016},{"style":4015},"--shiki-default:#B8A965",[4017],{"type":562,"value":4018},"print",{"type":557,"tag":1783,"props":4020,"children":4021},{"style":3450},[4022],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4024,"children":4025},{"style":3434},[4026],{"type":562,"value":4027},"tokenizer",{"type":557,"tag":1783,"props":4029,"children":4030},{"style":3450},[4031],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4033,"children":4034},{"style":3434},[4035],{"type":562,"value":4036},"decode",{"type":557,"tag":1783,"props":4038,"children":4039},{"style":3450},[4040],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4042,"children":4043},{"style":3434},[4044],{"type":562,"value":4045},"outputs",{"type":557,"tag":1783,"props":4047,"children":4048},{"style":3450},[4049],{"type":562,"value":4050},"[",{"type":557,"tag":1783,"props":4052,"children":4053},{"style":4001},[4054],{"type":562,"value":4055},"0",{"type":557,"tag":1783,"props":4057,"children":4058},{"style":3450},[4059],{"type":562,"value":4060},"][",{"type":557,"tag":1783,"props":4062,"children":4063},{"style":4015},[4064],{"type":562,"value":4065},"len",{"type":557,"tag":1783,"props":4067,"children":4068},{"style":3450},[4069],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4071,"children":4072},{"style":3434},[4073],{"type":562,"value":3985},{"type":557,"tag":1783,"props":4075,"children":4076},{"style":3450},[4077],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4079,"children":4080},{"style":3434},[4081],{"type":562,"value":4082},"input_ids",{"type":557,"tag":1783,"props":4084,"children":4085},{"style":3450},[4086],{"type":562,"value":4050},{"type":557,"tag":1783,"props":4088,"children":4089},{"style":4001},[4090],{"type":562,"value":4055},{"type":557,"tag":1783,"props":4092,"children":4093},{"style":3450},[4094],{"type":562,"value":4095},"]):]))\n",{"type":557,"tag":601,"props":4097,"children":4098},{"id":1386},[4099],{"type":562,"value":1386},{"type":557,"tag":558,"props":4101,"children":4102},{},[4103],{"type":562,"value":4104},"建議以 SWE-bench Verified 公開測試集取樣 20 題進行本地基準驗測，比對官方公布的 77.2% 通過率。量化版本 (Q4 vs Q8) 的品質差異可透過 SkillsBench 子集快速量化，SkillsBench 的 48.2 vs 30.0 差距顯著，是最敏感的退化偵測器。",{"type":557,"tag":601,"props":4106,"children":4107},{"id":1414},[4108],{"type":562,"value":1414},{"type":557,"tag":824,"props":4110,"children":4111},{},[4112,4117,4122],{"type":557,"tag":828,"props":4113,"children":4114},{},[4115],{"type":562,"value":4116},"thinking 模式在簡單問題上可能消耗 2000+ tokens，生產環境建議設定 max_tokens 上限或依問題複雜度動態切換模式",{"type":557,"tag":828,"props":4118,"children":4119},{},[4120],{"type":562,"value":4121},"視覺多模態的 mmproj 視覺編碼器在 llama.cpp 後端目前有載入相容性問題（社群 Day 1 實測回報），純文字任務可先略去視覺組件",{"type":557,"tag":828,"props":4123,"children":4124},{},[4125],{"type":562,"value":4126},"Q4 量化版在開啟 KV cache 量化時品質下降明顯，MoE 模型尤甚——建議關閉 KV cache 量化",{"type":557,"tag":601,"props":4128,"children":4129},{"id":1442},[4130],{"type":562,"value":1442},{"type":557,"tag":824,"props":4132,"children":4133},{},[4134,4139,4144],{"type":557,"tag":828,"props":4135,"children":4136},{},[4137],{"type":562,"value":4138},"觀測：tokens/s 推理速度、VRAM 峰值用量、thinking token 比例",{"type":557,"tag":828,"props":4140,"children":4141},{},[4142],{"type":562,"value":4143},"成本：量化等級 (Q4/Q6/Q8) 的品質-速度折衷點確認、多 GPU 分片方案的頻寬瓶頸評估",{"type":557,"tag":828,"props":4145,"children":4146},{},[4147],{"type":562,"value":4148},"風險：上下文超過 262K tokens 時 YaRN 延伸的品質穩定性、視覺模態功能的生產就緒狀態",{"type":557,"tag":4150,"props":4151,"children":4152},"style",{},[4153],{"type":562,"value":4154},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":271,"searchDepth":564,"depth":564,"links":4156},[],{"data":4158,"body":4159,"excerpt":-1,"toc":4739},{"title":271,"description":271},{"type":554,"children":4160},[4161,4165,4170,4174,4677,4681,4686,4690,4713,4717,4735],{"type":557,"tag":601,"props":4162,"children":4163},{"id":1348},[4164],{"type":562,"value":1348},{"type":557,"tag":558,"props":4166,"children":4167},{},[4168],{"type":562,"value":4169},"TPU 8t 與 TPU 8i 均透過 Google Cloud TPU v8 服務存取，需要 Google Cloud 帳戶與 Cloud TPU API 授權。兩款晶片預計 2026 年底 GA，目前仍在 Preview 階段，需申請早期存取資格。建議使用 JAX 框架或 TensorFlow 2.x + XLA 後端；Python 3.10+ 環境。",{"type":557,"tag":601,"props":4171,"children":4172},{"id":1363},[4173],{"type":562,"value":1366},{"type":557,"tag":1373,"props":4175,"children":4177},{"className":3414,"code":4176,"language":3416,"meta":271,"style":271},"import jax\nimport jax.numpy as jnp\nimport time\n\n# 確認 TPU 8i 設備可見\ndevices = jax.devices('tpu')\nprint(f\"Available TPU devices: {len(devices)}\")\n\n# 測試片上 SRAM KV cache 的低延遲推論\n@jax.jit\ndef inference_step(model_params, input_ids, kv_cache):\n    return model.forward(input_ids, kv_cache=kv_cache)\n\n# 量測 TTFT（Time to First Token）基準\nstart = time.perf_counter()\nresult = inference_step(params, test_input, cache)\nttft = time.perf_counter() - start\nprint(f\"TTFT: {ttft * 1000:.2f}ms\")\n",[4178],{"type":557,"tag":1380,"props":4179,"children":4180},{"__ignoreMap":271},[4181,4193,4224,4236,4243,4252,4299,4356,4363,4371,4394,4439,4489,4496,4504,4535,4582,4621],{"type":557,"tag":1783,"props":4182,"children":4183},{"class":3423,"line":3424},[4184,4188],{"type":557,"tag":1783,"props":4185,"children":4186},{"style":3428},[4187],{"type":562,"value":3442},{"type":557,"tag":1783,"props":4189,"children":4190},{"style":3434},[4191],{"type":562,"value":4192}," jax\n",{"type":557,"tag":1783,"props":4194,"children":4195},{"class":3423,"line":564},[4196,4200,4205,4209,4214,4219],{"type":557,"tag":1783,"props":4197,"children":4198},{"style":3428},[4199],{"type":562,"value":3442},{"type":557,"tag":1783,"props":4201,"children":4202},{"style":3434},[4203],{"type":562,"value":4204}," jax",{"type":557,"tag":1783,"props":4206,"children":4207},{"style":3450},[4208],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4210,"children":4211},{"style":3434},[4212],{"type":562,"value":4213},"numpy ",{"type":557,"tag":1783,"props":4215,"children":4216},{"style":3428},[4217],{"type":562,"value":4218},"as",{"type":557,"tag":1783,"props":4220,"children":4221},{"style":3434},[4222],{"type":562,"value":4223}," jnp\n",{"type":557,"tag":1783,"props":4225,"children":4226},{"class":3423,"line":3470},[4227,4231],{"type":557,"tag":1783,"props":4228,"children":4229},{"style":3428},[4230],{"type":562,"value":3442},{"type":557,"tag":1783,"props":4232,"children":4233},{"style":3434},[4234],{"type":562,"value":4235}," time\n",{"type":557,"tag":1783,"props":4237,"children":4238},{"class":3423,"line":95},[4239],{"type":557,"tag":1783,"props":4240,"children":4241},{"emptyLinePlaceholder":3464},[4242],{"type":562,"value":3467},{"type":557,"tag":1783,"props":4244,"children":4245},{"class":3423,"line":96},[4246],{"type":557,"tag":1783,"props":4247,"children":4249},{"style":4248},"--shiki-default:#758575DD",[4250],{"type":562,"value":4251},"# 確認 TPU 8i 設備可見\n",{"type":557,"tag":1783,"props":4253,"children":4254},{"class":3423,"line":3572},[4255,4260,4264,4268,4272,4277,4281,4286,4291,4295],{"type":557,"tag":1783,"props":4256,"children":4257},{"style":3434},[4258],{"type":562,"value":4259},"devices ",{"type":557,"tag":1783,"props":4261,"children":4262},{"style":3450},[4263],{"type":562,"value":3481},{"type":557,"tag":1783,"props":4265,"children":4266},{"style":3434},[4267],{"type":562,"value":4204},{"type":557,"tag":1783,"props":4269,"children":4270},{"style":3450},[4271],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4273,"children":4274},{"style":3434},[4275],{"type":562,"value":4276},"devices",{"type":557,"tag":1783,"props":4278,"children":4279},{"style":3450},[4280],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4282,"children":4283},{"style":3484},[4284],{"type":562,"value":4285},"'",{"type":557,"tag":1783,"props":4287,"children":4288},{"style":3490},[4289],{"type":562,"value":4290},"tpu",{"type":557,"tag":1783,"props":4292,"children":4293},{"style":3484},[4294],{"type":562,"value":4285},{"type":557,"tag":1783,"props":4296,"children":4297},{"style":3450},[4298],{"type":562,"value":3540},{"type":557,"tag":1783,"props":4300,"children":4301},{"class":3423,"line":3586},[4302,4306,4310,4315,4320,4326,4330,4334,4338,4343,4348,4352],{"type":557,"tag":1783,"props":4303,"children":4304},{"style":4015},[4305],{"type":562,"value":4018},{"type":557,"tag":1783,"props":4307,"children":4308},{"style":3450},[4309],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4311,"children":4312},{"style":3977},[4313],{"type":562,"value":4314},"f",{"type":557,"tag":1783,"props":4316,"children":4317},{"style":3490},[4318],{"type":562,"value":4319},"\"Available TPU devices: ",{"type":557,"tag":1783,"props":4321,"children":4323},{"style":4322},"--shiki-default:#C99076",[4324],{"type":562,"value":4325},"{",{"type":557,"tag":1783,"props":4327,"children":4328},{"style":4015},[4329],{"type":562,"value":4065},{"type":557,"tag":1783,"props":4331,"children":4332},{"style":3450},[4333],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4335,"children":4336},{"style":3434},[4337],{"type":562,"value":4276},{"type":557,"tag":1783,"props":4339,"children":4340},{"style":3450},[4341],{"type":562,"value":4342},")",{"type":557,"tag":1783,"props":4344,"children":4345},{"style":4322},[4346],{"type":562,"value":4347},"}",{"type":557,"tag":1783,"props":4349,"children":4350},{"style":3490},[4351],{"type":562,"value":3602},{"type":557,"tag":1783,"props":4353,"children":4354},{"style":3450},[4355],{"type":562,"value":3540},{"type":557,"tag":1783,"props":4357,"children":4358},{"class":3423,"line":3618},[4359],{"type":557,"tag":1783,"props":4360,"children":4361},{"emptyLinePlaceholder":3464},[4362],{"type":562,"value":3467},{"type":557,"tag":1783,"props":4364,"children":4365},{"class":3423,"line":3643},[4366],{"type":557,"tag":1783,"props":4367,"children":4368},{"style":4248},[4369],{"type":562,"value":4370},"# 測試片上 SRAM KV cache 的低延遲推論\n",{"type":557,"tag":1783,"props":4372,"children":4373},{"class":3423,"line":3651},[4374,4379,4385,4389],{"type":557,"tag":1783,"props":4375,"children":4376},{"style":3450},[4377],{"type":562,"value":4378},"@",{"type":557,"tag":1783,"props":4380,"children":4382},{"style":4381},"--shiki-default:#80A665",[4383],{"type":562,"value":4384},"jax",{"type":557,"tag":1783,"props":4386,"children":4387},{"style":3450},[4388],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4390,"children":4391},{"style":4381},[4392],{"type":562,"value":4393},"jit\n",{"type":557,"tag":1783,"props":4395,"children":4396},{"class":3423,"line":3659},[4397,4402,4407,4411,4416,4420,4425,4429,4434],{"type":557,"tag":1783,"props":4398,"children":4399},{"style":3977},[4400],{"type":562,"value":4401},"def",{"type":557,"tag":1783,"props":4403,"children":4404},{"style":4381},[4405],{"type":562,"value":4406}," inference_step",{"type":557,"tag":1783,"props":4408,"children":4409},{"style":3450},[4410],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4412,"children":4413},{"style":3434},[4414],{"type":562,"value":4415},"model_params",{"type":557,"tag":1783,"props":4417,"children":4418},{"style":3450},[4419],{"type":562,"value":3453},{"type":557,"tag":1783,"props":4421,"children":4422},{"style":3434},[4423],{"type":562,"value":4424}," input_ids",{"type":557,"tag":1783,"props":4426,"children":4427},{"style":3450},[4428],{"type":562,"value":3453},{"type":557,"tag":1783,"props":4430,"children":4431},{"style":3434},[4432],{"type":562,"value":4433}," kv_cache",{"type":557,"tag":1783,"props":4435,"children":4436},{"style":3450},[4437],{"type":562,"value":4438},"):\n",{"type":557,"tag":1783,"props":4440,"children":4441},{"class":3423,"line":3747},[4442,4447,4451,4455,4460,4464,4468,4472,4476,4480,4485],{"type":557,"tag":1783,"props":4443,"children":4444},{"style":3428},[4445],{"type":562,"value":4446},"    return",{"type":557,"tag":1783,"props":4448,"children":4449},{"style":3434},[4450],{"type":562,"value":3961},{"type":557,"tag":1783,"props":4452,"children":4453},{"style":3450},[4454],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4456,"children":4457},{"style":3434},[4458],{"type":562,"value":4459},"forward",{"type":557,"tag":1783,"props":4461,"children":4462},{"style":3450},[4463],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4465,"children":4466},{"style":3434},[4467],{"type":562,"value":4082},{"type":557,"tag":1783,"props":4469,"children":4470},{"style":3450},[4471],{"type":562,"value":3453},{"type":557,"tag":1783,"props":4473,"children":4474},{"style":3590},[4475],{"type":562,"value":4433},{"type":557,"tag":1783,"props":4477,"children":4478},{"style":3450},[4479],{"type":562,"value":3481},{"type":557,"tag":1783,"props":4481,"children":4482},{"style":3434},[4483],{"type":562,"value":4484},"kv_cache",{"type":557,"tag":1783,"props":4486,"children":4487},{"style":3450},[4488],{"type":562,"value":3540},{"type":557,"tag":1783,"props":4490,"children":4491},{"class":3423,"line":3778},[4492],{"type":557,"tag":1783,"props":4493,"children":4494},{"emptyLinePlaceholder":3464},[4495],{"type":562,"value":3467},{"type":557,"tag":1783,"props":4497,"children":4498},{"class":3423,"line":3791},[4499],{"type":557,"tag":1783,"props":4500,"children":4501},{"style":4248},[4502],{"type":562,"value":4503},"# 量測 TTFT（Time to First Token）基準\n",{"type":557,"tag":1783,"props":4505,"children":4506},{"class":3423,"line":3813},[4507,4512,4516,4521,4525,4530],{"type":557,"tag":1783,"props":4508,"children":4509},{"style":3434},[4510],{"type":562,"value":4511},"start ",{"type":557,"tag":1783,"props":4513,"children":4514},{"style":3450},[4515],{"type":562,"value":3481},{"type":557,"tag":1783,"props":4517,"children":4518},{"style":3434},[4519],{"type":562,"value":4520}," time",{"type":557,"tag":1783,"props":4522,"children":4523},{"style":3450},[4524],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4526,"children":4527},{"style":3434},[4528],{"type":562,"value":4529},"perf_counter",{"type":557,"tag":1783,"props":4531,"children":4532},{"style":3450},[4533],{"type":562,"value":4534},"()\n",{"type":557,"tag":1783,"props":4536,"children":4537},{"class":3423,"line":3835},[4538,4543,4547,4551,4555,4560,4564,4569,4573,4578],{"type":557,"tag":1783,"props":4539,"children":4540},{"style":3434},[4541],{"type":562,"value":4542},"result ",{"type":557,"tag":1783,"props":4544,"children":4545},{"style":3450},[4546],{"type":562,"value":3481},{"type":557,"tag":1783,"props":4548,"children":4549},{"style":3434},[4550],{"type":562,"value":4406},{"type":557,"tag":1783,"props":4552,"children":4553},{"style":3450},[4554],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4556,"children":4557},{"style":3434},[4558],{"type":562,"value":4559},"params",{"type":557,"tag":1783,"props":4561,"children":4562},{"style":3450},[4563],{"type":562,"value":3453},{"type":557,"tag":1783,"props":4565,"children":4566},{"style":3434},[4567],{"type":562,"value":4568}," test_input",{"type":557,"tag":1783,"props":4570,"children":4571},{"style":3450},[4572],{"type":562,"value":3453},{"type":557,"tag":1783,"props":4574,"children":4575},{"style":3434},[4576],{"type":562,"value":4577}," cache",{"type":557,"tag":1783,"props":4579,"children":4580},{"style":3450},[4581],{"type":562,"value":3540},{"type":557,"tag":1783,"props":4583,"children":4584},{"class":3423,"line":3853},[4585,4590,4594,4598,4602,4606,4611,4616],{"type":557,"tag":1783,"props":4586,"children":4587},{"style":3434},[4588],{"type":562,"value":4589},"ttft ",{"type":557,"tag":1783,"props":4591,"children":4592},{"style":3450},[4593],{"type":562,"value":3481},{"type":557,"tag":1783,"props":4595,"children":4596},{"style":3434},[4597],{"type":562,"value":4520},{"type":557,"tag":1783,"props":4599,"children":4600},{"style":3450},[4601],{"type":562,"value":3520},{"type":557,"tag":1783,"props":4603,"children":4604},{"style":3434},[4605],{"type":562,"value":4529},{"type":557,"tag":1783,"props":4607,"children":4608},{"style":3450},[4609],{"type":562,"value":4610},"()",{"type":557,"tag":1783,"props":4612,"children":4613},{"style":3977},[4614],{"type":562,"value":4615}," -",{"type":557,"tag":1783,"props":4617,"children":4618},{"style":3434},[4619],{"type":562,"value":4620}," start\n",{"type":557,"tag":1783,"props":4622,"children":4623},{"class":3423,"line":3861},[4624,4628,4632,4636,4641,4645,4649,4654,4659,4664,4668,4673],{"type":557,"tag":1783,"props":4625,"children":4626},{"style":4015},[4627],{"type":562,"value":4018},{"type":557,"tag":1783,"props":4629,"children":4630},{"style":3450},[4631],{"type":562,"value":3530},{"type":557,"tag":1783,"props":4633,"children":4634},{"style":3977},[4635],{"type":562,"value":4314},{"type":557,"tag":1783,"props":4637,"children":4638},{"style":3490},[4639],{"type":562,"value":4640},"\"TTFT: ",{"type":557,"tag":1783,"props":4642,"children":4643},{"style":4322},[4644],{"type":562,"value":4325},{"type":557,"tag":1783,"props":4646,"children":4647},{"style":3434},[4648],{"type":562,"value":4589},{"type":557,"tag":1783,"props":4650,"children":4651},{"style":3977},[4652],{"type":562,"value":4653},"*",{"type":557,"tag":1783,"props":4655,"children":4656},{"style":4001},[4657],{"type":562,"value":4658}," 1000",{"type":557,"tag":1783,"props":4660,"children":4661},{"style":3977},[4662],{"type":562,"value":4663},":.2f",{"type":557,"tag":1783,"props":4665,"children":4666},{"style":4322},[4667],{"type":562,"value":4347},{"type":557,"tag":1783,"props":4669,"children":4670},{"style":3490},[4671],{"type":562,"value":4672},"ms\"",{"type":557,"tag":1783,"props":4674,"children":4675},{"style":3450},[4676],{"type":562,"value":3540},{"type":557,"tag":601,"props":4678,"children":4679},{"id":1386},[4680],{"type":562,"value":1386},{"type":557,"tag":558,"props":4682,"children":4683},{},[4684],{"type":562,"value":4685},"測試 TPU 8i 的 Agent 低延遲性能時，重點量測多輪對話場景的 P99 延遲，比較啟用片上 SRAM KV cache 前後的差異。建議設計長上下文 (32K+ tokens) 多輪對話場景，分別在 TPU 8i 與 A100/H100 上量測 TTFT 與 TPOT(Time Per Output Token) ，並記錄 MoE 架構模型下的集體通訊開銷。",{"type":557,"tag":601,"props":4687,"children":4688},{"id":1414},[4689],{"type":562,"value":1414},{"type":557,"tag":824,"props":4691,"children":4692},{},[4693,4698,4703,4708],{"type":557,"tag":828,"props":4694,"children":4695},{},[4696],{"type":562,"value":4697},"TPU 8t 的 Virgo Network 擴展需要正確的 JAX pjit 並行策略配置，錯誤的 mesh 設定會顯著降低 Goodput",{"type":557,"tag":828,"props":4699,"children":4700},{},[4701],{"type":562,"value":4702},"TPU 8i 的片上 SRAM KV cache 優勢只在模型能完整配置於片上時才能發揮，過大的模型反而增加 HBM 碎片化風險",{"type":557,"tag":828,"props":4704,"children":4705},{},[4706],{"type":562,"value":4707},"MediaTek 製造的 TPU 8i 與 Broadcom 製造的 TPU 8t 使用不同驅動版本，混合部署時需注意版本相容性",{"type":557,"tag":828,"props":4709,"children":4710},{},[4711],{"type":562,"value":4712},"Preview 階段的 API 介面可能在 GA 時變動，避免在此階段硬編碼特定 TPU 拓撲參數",{"type":557,"tag":601,"props":4714,"children":4715},{"id":1442},[4716],{"type":562,"value":1442},{"type":557,"tag":824,"props":4718,"children":4719},{},[4720,4725,4730],{"type":557,"tag":828,"props":4721,"children":4722},{},[4723],{"type":562,"value":4724},"觀測：TTFT P50/P99、KV cache 命中率、片上 SRAM 使用率、Goodput 百分比",{"type":557,"tag":828,"props":4726,"children":4727},{},[4728],{"type":562,"value":4729},"成本：比較 TPU 8i 與 Nvidia H100 的每百萬 token 成本；Agent 多步驟工作流程總費用",{"type":557,"tag":828,"props":4731,"children":4732},{},[4733],{"type":562,"value":4734},"風險：2026 年底 GA 前的 SLA 保障範圍、Gemini 模型版本行為一致性、JAX 生態系鎖定風險",{"type":557,"tag":4150,"props":4736,"children":4737},{},[4738],{"type":562,"value":4154},{"title":271,"searchDepth":564,"depth":564,"links":4740},[]]