[{"@context":"https:\/\/schema.org\/","@type":"BlogPosting","@id":"https:\/\/blog.terabox.com\/insights\/sholto-douglas-trenton-bricken-rl-ai-agents#BlogPosting","mainEntityOfPage":"https:\/\/blog.terabox.com\/insights\/sholto-douglas-trenton-bricken-rl-ai-agents","headline":"RL and AI Agents: Sholto Douglas &#038; Trenton Bricken","name":"RL and AI Agents: Sholto Douglas &#038; Trenton Bricken","description":"\ud83d\udcfa Today&#8217;s recommended deep-dive video: https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M From Chatbots to Agents: Inside the Reinforcement Learning RevolutionThe Great Unhobbling: RL and Verifiable RewardsPeering into the Alien Brain: Mechanistic InterpretabilityThe Economic Aftermath: Moravec\u2019s ParadoxKey TakeawaysQ&amp;A From Chatbots to Agents: Inside the Reinforcement Learning... ","datePublished":"2026-06-26","dateModified":"2026-06-26","author":{"@type":"Person","@id":"https:\/\/blog.terabox.com\/author\/flextech-admin\/#Person","name":"flextech-admin","url":"https:\/\/blog.terabox.com\/author\/flextech-admin\/","image":{"@type":"ImageObject","@id":"https:\/\/secure.gravatar.com\/avatar\/ad516503a11cd5ca435acc9bb6523536?s=150&#038;d=mm&#038;r=gforcedefault=1","url":"https:\/\/secure.gravatar.com\/avatar\/ad516503a11cd5ca435acc9bb6523536?s=150&#038;d=mm&#038;r=gforcedefault=1","height":96,"width":96}},"publisher":{"@type":"Organization","name":"terabox","logo":{"@type":"ImageObject","@id":"http:\/\/blog.terabox.com\/wp-content\/uploads\/2021\/11\/logo\u4ea7\u54c1\u540d-\u7ad6\u7248.png","url":"http:\/\/blog.terabox.com\/wp-content\/uploads\/2021\/11\/logo\u4ea7\u54c1\u540d-\u7ad6\u7248.png","width":900,"height":900}},"image":{"@type":"ImageObject","@id":"https:\/\/img.youtube.com\/vi\/64lXQP6cs5M\/maxresdefault.jpg","url":"https:\/\/img.youtube.com\/vi\/64lXQP6cs5M\/maxresdefault.jpg","height":"","width":""},"url":"https:\/\/blog.terabox.com\/insights\/sholto-douglas-trenton-bricken-rl-ai-agents","video":{"@context":"http:\/\/schema.org\/","@type":"VideoObject","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject","contentUrl":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M","name":"Is RL + LLMs enough for AGI? \u2014 Sholto Douglas & Trenton Bricken","description":"New episode with my good friends Sholto Douglas & Trenton Bricken. Sholto focuses on scaling RL and Trenton researches mechanistic interpretability, both at Anthropic. We talk through what\u2019s changed in the last year of AI research; the new RL regime and how far it can scale; how to trace a model\u2019s thoughts; and how countries, workers, and students should prepare for AGI. See you next year for v3. Enjoy!\n\n\ud835\udc04\ud835\udc0f\ud835\udc08\ud835\udc12\ud835\udc0e\ud835\udc03\ud835\udc04 \ud835\udc0b\ud835\udc08\ud835\udc0d\ud835\udc0a\ud835\udc12\n* Transcript: https:\/\/www.dwarkesh.com\/p\/sholto-trenton-2\n* Apple Podcasts: https:\/\/podcasts.apple.com\/us\/podcast\/dwarkesh-podcast\/id1516093381\n* Spotify: https:\/\/open.spotify.com\/episode\/3H46XEWBlUeTY1c1mHolqh?si=b645971b1af546fa\n* Last year's episode: https:\/\/www.youtube.com\/watch?v=UTuuTTnjxMQ\n\n\ud835\udc12\ud835\udc0f\ud835\udc0e\ud835\udc0d\ud835\udc12\ud835\udc0e\ud835\udc11\ud835\udc12\n* WorkOS ensures that AI companies like OpenAI and Anthropic don't have to spend engineering time building enterprise features like access controls or SSO. It\u2019s not that they don't need these features; it's just that WorkOS gives them battle-tested APIs that they can use for auth, provisioning, and more. Start building today at https:\/\/workos.com.\n\n* Scale is building the infrastructure for safer, smarter AI. Scale\u2019s Data Foundry gives major AI labs access to high-quality data to fuel post-training, while their public leaderboards help assess model capabilities. They also just released Scale Evaluation, a new tool that diagnoses model limitations. If you\u2019re an AI researcher or engineer, learn how Scale can help you push the frontier at https:\/\/scale.com\/dwarkesh.\n\n* Lighthouse is THE fastest immigration solution for the technology industry. They specialize in expert visas like the O-1A and EB-1A, and they\u2019ve already helped companies like Cursor, Notion, and Replit navigate U.S. immigration. Explore which visa is right for you at https:\/\/lighthousehq.com\/ref\/Dwarkesh.\n\nTo sponsor a future episode, visit https:\/\/dwarkesh.com\/advertise.\n\n\ud835\udc13\ud835\udc08\ud835\udc0c\ud835\udc04\ud835\udc12\ud835\udc13\ud835\udc00\ud835\udc0c\ud835\udc0f\ud835\udc12\n00:00:00 \u2013 How far can RL scale?\n00:16:27 \u2013 Is continual learning a key bottleneck?\n00:31:59 \u2013 Model self-awareness\n00:50:32 \u2013 Taste and slop\n01:00:51 \u2013 How soon to fully autonomous agents?\n01:15:17 \u2013 Neuralese\n01:18:55 \u2013 Inference compute will bottleneck AGI\n01:23:01 \u2013 DeepSeek algorithmic improvements\n01:37:42 \u2013 Why are LLMs \u2018baby AGI\u2019 but not AlphaZero?\n01:45:38 \u2013 Mech interp\n01:56:15 \u2013 How countries should prepare for AGI\n02:10:26 \u2013 Automating white collar work\n02:15:35 \u2013 Advice for students","thumbnailUrl":["https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/default.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/mqdefault.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/hqdefault.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/sddefault.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/maxresdefault.jpg"],"uploadDate":"2025-05-22T21:06:29+00:00","duration":"PT2H24M2S","embedUrl":"https:\/\/www.youtube.com\/embed\/64lXQP6cs5M","publisher":{"@type":"Organization","@id":"https:\/\/www.youtube.com\/channel\/UCXl4i9dYBrFOabk0xGmbkRA#Organization","url":"https:\/\/www.youtube.com\/channel\/UCXl4i9dYBrFOabk0xGmbkRA","name":"Dwarkesh Patel","description":"Deeply researched interviews\n","logo":{"url":"https:\/\/yt3.ggpht.com\/lG-z7sTfhFIW2Ne1oXMHvXMXyZSaA02_I17gUel0GAEj7OypsSHQ7PE91Vp4bTbpm3PTIAWJdko=s800-c-k-c0x00ffffff-no-rj","width":800,"height":800,"@type":"ImageObject","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_publisher_logo_ImageObject"}},"potentialAction":{"@type":"SeekToAction","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_potentialAction","target":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M&t={seek_to_second_number}","startOffset-input":"required name=seek_to_second_number"},"interactionStatistic":[[{"@type":"InteractionCounter","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_interactionStatistic_WatchAction","interactionType":{"@type":"WatchAction"},"userInteractionCount":201579}],{"@type":"InteractionCounter","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_interactionStatistic_LikeAction","interactionType":{"@type":"LikeAction"},"userInteractionCount":3967}]},"about":["Insights","\u300eEnglish\u300f"],"wordCount":1710},{"@context":"https:\/\/schema.org\/","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Insights","item":"https:\/\/blog.terabox.com\/insights\/#breadcrumbitem"},{"@type":"ListItem","position":2,"name":"RL and AI Agents: Sholto Douglas &#038; Trenton Bricken","item":"https:\/\/blog.terabox.com\/insights\/sholto-douglas-trenton-bricken-rl-ai-agents#breadcrumbitem"}]}]