[{"@context":"https:\/\/schema.org\/","@type":"BlogPosting","@id":"https:\/\/blog.terabox.com\/insights\/rl-y-futuro-agentes-software-anthropic#BlogPosting","mainEntityOfPage":"https:\/\/blog.terabox.com\/insights\/rl-y-futuro-agentes-software-anthropic","headline":"RL y el futuro de los Agentes de Software con Anthropic","name":"RL y el futuro de los Agentes de Software con Anthropic","description":"\ud83d\udcfa V\u00eddeo de estudio recomendado hoy: https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M La era de los agentes expertos: RL, interpretabilidad y el futuro del trabajo intelectualEl fin de la &#8220;torpeza&#8221; de los modelos: RL y recompensas verificablesInterpretabilidad: Leyendo la &#8220;mente&#8221; de la IAEl impacto econ\u00f3mico:... ","datePublished":"2026-06-26","dateModified":"2026-06-26","author":{"@type":"Person","@id":"https:\/\/blog.terabox.com\/author\/flextech-admin\/#Person","name":"flextech-admin","url":"https:\/\/blog.terabox.com\/author\/flextech-admin\/","image":{"@type":"ImageObject","@id":"https:\/\/secure.gravatar.com\/avatar\/ad516503a11cd5ca435acc9bb6523536?s=150&#038;d=mm&#038;r=gforcedefault=1","url":"https:\/\/secure.gravatar.com\/avatar\/ad516503a11cd5ca435acc9bb6523536?s=150&#038;d=mm&#038;r=gforcedefault=1","height":96,"width":96}},"publisher":{"@type":"Organization","name":"terabox","logo":{"@type":"ImageObject","@id":"http:\/\/blog.terabox.com\/wp-content\/uploads\/2021\/11\/logo\u4ea7\u54c1\u540d-\u7ad6\u7248.png","url":"http:\/\/blog.terabox.com\/wp-content\/uploads\/2021\/11\/logo\u4ea7\u54c1\u540d-\u7ad6\u7248.png","width":900,"height":900}},"image":{"@type":"ImageObject","@id":"https:\/\/img.youtube.com\/vi\/64lXQP6cs5M\/maxresdefault.jpg","url":"https:\/\/img.youtube.com\/vi\/64lXQP6cs5M\/maxresdefault.jpg","height":"","width":""},"url":"https:\/\/blog.terabox.com\/insights\/rl-y-futuro-agentes-software-anthropic","video":{"@context":"http:\/\/schema.org\/","@type":"VideoObject","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject","contentUrl":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M","name":"Is RL + LLMs enough for AGI? \u2014 Sholto Douglas & Trenton Bricken","description":"New episode with my good friends Sholto Douglas & Trenton Bricken. Sholto focuses on scaling RL and Trenton researches mechanistic interpretability, both at Anthropic. We talk through what\u2019s changed in the last year of AI research; the new RL regime and how far it can scale; how to trace a model\u2019s thoughts; and how countries, workers, and students should prepare for AGI. See you next year for v3. Enjoy!\n\n\ud835\udc04\ud835\udc0f\ud835\udc08\ud835\udc12\ud835\udc0e\ud835\udc03\ud835\udc04 \ud835\udc0b\ud835\udc08\ud835\udc0d\ud835\udc0a\ud835\udc12\n* Transcript: https:\/\/www.dwarkesh.com\/p\/sholto-trenton-2\n* Apple Podcasts: https:\/\/podcasts.apple.com\/us\/podcast\/dwarkesh-podcast\/id1516093381\n* Spotify: https:\/\/open.spotify.com\/episode\/3H46XEWBlUeTY1c1mHolqh?si=b645971b1af546fa\n* Last year's episode: https:\/\/www.youtube.com\/watch?v=UTuuTTnjxMQ\n\n\ud835\udc12\ud835\udc0f\ud835\udc0e\ud835\udc0d\ud835\udc12\ud835\udc0e\ud835\udc11\ud835\udc12\n* WorkOS ensures that AI companies like OpenAI and Anthropic don't have to spend engineering time building enterprise features like access controls or SSO. It\u2019s not that they don't need these features; it's just that WorkOS gives them battle-tested APIs that they can use for auth, provisioning, and more. Start building today at https:\/\/workos.com.\n\n* Scale is building the infrastructure for safer, smarter AI. Scale\u2019s Data Foundry gives major AI labs access to high-quality data to fuel post-training, while their public leaderboards help assess model capabilities. They also just released Scale Evaluation, a new tool that diagnoses model limitations. If you\u2019re an AI researcher or engineer, learn how Scale can help you push the frontier at https:\/\/scale.com\/dwarkesh.\n\n* Lighthouse is THE fastest immigration solution for the technology industry. They specialize in expert visas like the O-1A and EB-1A, and they\u2019ve already helped companies like Cursor, Notion, and Replit navigate U.S. immigration. Explore which visa is right for you at https:\/\/lighthousehq.com\/ref\/Dwarkesh.\n\nTo sponsor a future episode, visit https:\/\/dwarkesh.com\/advertise.\n\n\ud835\udc13\ud835\udc08\ud835\udc0c\ud835\udc04\ud835\udc12\ud835\udc13\ud835\udc00\ud835\udc0c\ud835\udc0f\ud835\udc12\n00:00:00 \u2013 How far can RL scale?\n00:16:27 \u2013 Is continual learning a key bottleneck?\n00:31:59 \u2013 Model self-awareness\n00:50:32 \u2013 Taste and slop\n01:00:51 \u2013 How soon to fully autonomous agents?\n01:15:17 \u2013 Neuralese\n01:18:55 \u2013 Inference compute will bottleneck AGI\n01:23:01 \u2013 DeepSeek algorithmic improvements\n01:37:42 \u2013 Why are LLMs \u2018baby AGI\u2019 but not AlphaZero?\n01:45:38 \u2013 Mech interp\n01:56:15 \u2013 How countries should prepare for AGI\n02:10:26 \u2013 Automating white collar work\n02:15:35 \u2013 Advice for students","thumbnailUrl":["https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/default.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/mqdefault.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/hqdefault.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/sddefault.jpg","https:\/\/i.ytimg.com\/vi\/64lXQP6cs5M\/maxresdefault.jpg"],"uploadDate":"2025-05-22T21:06:29+00:00","duration":"PT2H24M2S","embedUrl":"https:\/\/www.youtube.com\/embed\/64lXQP6cs5M","publisher":{"@type":"Organization","@id":"https:\/\/www.youtube.com\/channel\/UCXl4i9dYBrFOabk0xGmbkRA#Organization","url":"https:\/\/www.youtube.com\/channel\/UCXl4i9dYBrFOabk0xGmbkRA","name":"Dwarkesh Patel","description":"Deeply researched interviews\n","logo":{"url":"https:\/\/yt3.ggpht.com\/lG-z7sTfhFIW2Ne1oXMHvXMXyZSaA02_I17gUel0GAEj7OypsSHQ7PE91Vp4bTbpm3PTIAWJdko=s800-c-k-c0x00ffffff-no-rj","width":800,"height":800,"@type":"ImageObject","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_publisher_logo_ImageObject"}},"potentialAction":{"@type":"SeekToAction","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_potentialAction","target":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M&t={seek_to_second_number}","startOffset-input":"required name=seek_to_second_number"},"interactionStatistic":[[{"@type":"InteractionCounter","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_interactionStatistic_WatchAction","interactionType":{"@type":"WatchAction"},"userInteractionCount":201579}],{"@type":"InteractionCounter","@id":"https:\/\/www.youtube.com\/watch?v=64lXQP6cs5M#VideoObject_interactionStatistic_LikeAction","interactionType":{"@type":"LikeAction"},"userInteractionCount":3967}]},"about":["Language","\u300eSpanish\u300f","Insights"],"wordCount":1351},{"@context":"https:\/\/schema.org\/","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Insights","item":"https:\/\/blog.terabox.com\/insights\/#breadcrumbitem"},{"@type":"ListItem","position":2,"name":"RL y el futuro de los Agentes de Software con Anthropic","item":"https:\/\/blog.terabox.com\/insights\/rl-y-futuro-agentes-software-anthropic#breadcrumbitem"}]}]